Commit 8dd2ce2b authored by Robert Griesemer's avatar Robert Griesemer

cmd/compile: factor our literal lexing from main lexer function

Further reduces complexity of lexer.next which is now readable.
Also removes the need to initialize various local variables in
each next call even if they are not used for the current token.

No measurable performance change for `time go build -a net/http`
(best of 5 runs): difference < 0.3% (in the noise).

Change-Id: I0d74caa2768920af1ceee027e0f46595119d4210
Reviewed-on: https://go-review.googlesource.com/19865
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
parent 38d4511b
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
package gc package gc
import ( import (
"bytes"
"cmd/internal/obj" "cmd/internal/obj"
"flag" "flag"
"fmt" "fmt"
...@@ -922,14 +921,6 @@ const ( ...@@ -922,14 +921,6 @@ const (
) )
func (l *lexer) next() { func (l *lexer) next() {
var c1 rune
var op Op
var escflag int
var v int64
var cp *bytes.Buffer
var s *Sym
var str string
prevlineno = lineno prevlineno = lineno
nlsemi := l.nlsemi nlsemi := l.nlsemi
...@@ -958,132 +949,15 @@ l0: ...@@ -958,132 +949,15 @@ l0:
// identifiers and keywords // identifiers and keywords
// (for better error messages consume all chars >= utf8.RuneSelf for identifiers) // (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
if isLetter(c) || c >= utf8.RuneSelf { if isLetter(c) || c >= utf8.RuneSelf {
cp = &lexbuf l.ident(c)
cp.Reset() if l.tok == LIGNORE {
// accelerate common case (7bit ASCII)
for isLetter(c) || isDigit(c) {
cp.WriteByte(byte(c))
c = l.getr()
}
// general case
for {
if c >= utf8.RuneSelf {
if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
if cp.Len() == 0 && unicode.IsDigit(c) {
Yyerror("identifier cannot begin with digit %#U", c)
}
} else {
Yyerror("invalid identifier character %#U", c)
}
cp.WriteRune(c)
} else if isLetter(c) || isDigit(c) {
cp.WriteByte(byte(c))
} else {
break
}
c = l.getr()
}
cp = nil
l.ungetr(c)
s = LookupBytes(lexbuf.Bytes())
if s.Lexical == LIGNORE {
goto l0 goto l0
} }
if Debug['x'] != 0 {
fmt.Printf("lex: %s %s\n", s, lexname(rune(s.Lexical)))
}
l.sym_ = s
switch s.Lexical {
case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
l.nlsemi = true
}
l.tok = int32(s.Lexical)
return return
} }
if isDigit(c) { var c1 rune
cp = &lexbuf var op Op
cp.Reset()
if c != '0' {
for {
cp.WriteByte(byte(c))
c = l.getr()
if isDigit(c) {
continue
}
if c == '.' {
goto casedot
}
if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
goto caseep
}
if c == 'i' {
goto casei
}
goto ncu
}
}
cp.WriteByte(byte(c))
c = l.getr()
if c == 'x' || c == 'X' {
for {
cp.WriteByte(byte(c))
c = l.getr()
if isDigit(c) {
continue
}
if c >= 'a' && c <= 'f' {
continue
}
if c >= 'A' && c <= 'F' {
continue
}
if lexbuf.Len() == 2 {
Yyerror("malformed hex constant")
}
if c == 'p' {
goto caseep
}
goto ncu
}
}
if c == 'p' { // 0p begins floating point zero
goto caseep
}
c1 = 0
for {
if !isDigit(c) {
break
}
if c < '0' || c > '7' {
c1 = 1 // not octal
}
cp.WriteByte(byte(c))
c = l.getr()
}
if c == '.' {
goto casedot
}
if c == 'e' || c == 'E' {
goto caseep
}
if c == 'i' {
goto casei
}
if c1 != 0 {
Yyerror("malformed octal constant")
}
goto ncu
}
switch c { switch c {
case EOF: case EOF:
...@@ -1100,14 +974,16 @@ l0: ...@@ -1100,14 +974,16 @@ l0:
l.tok = -1 l.tok = -1
return return
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
l.number(c)
return
case '.': case '.':
c1 = l.getr() c1 = l.getr()
if isDigit(c1) { if isDigit(c1) {
cp = &lexbuf l.ungetr(c1)
cp.Reset() l.number('.')
cp.WriteByte(byte(c)) return
c = c1
goto casedot
} }
if c1 == '.' { if c1 == '.' {
...@@ -1121,75 +997,16 @@ l0: ...@@ -1121,75 +997,16 @@ l0:
c1 = '.' c1 = '.'
} }
// "..."
case '"': case '"':
lexbuf.Reset() l.stdString()
lexbuf.WriteString(`"<string>"`) return
cp = &strbuf
cp.Reset()
for {
if l.escchar('"', &escflag, &v) {
break
}
if v < utf8.RuneSelf || escflag != 0 {
cp.WriteByte(byte(v))
} else {
cp.WriteRune(rune(v))
}
}
goto strlit
// `...`
case '`': case '`':
lexbuf.Reset() l.rawString()
lexbuf.WriteString("`<string>`") return
cp = &strbuf
cp.Reset()
for {
c = l.getr()
if c == '\r' {
continue
}
if c == EOF {
Yyerror("eof in string")
break
}
if c == '`' {
break
}
cp.WriteRune(c)
}
goto strlit
// '.'
case '\'': case '\'':
if l.escchar('\'', &escflag, &v) { l.rune()
Yyerror("empty character literal or unescaped ' in character literal")
v = '\''
}
if !l.escchar('\'', &escflag, &v) {
Yyerror("missing '")
l.ungetr(rune(v))
}
x := new(Mpint)
l.val.U = x
Mpmovecfix(x, v)
x.Rune = true
if Debug['x'] != 0 {
fmt.Printf("lex: codepoint literal\n")
}
litbuf = "rune literal"
l.nlsemi = true
l.tok = LLITERAL
return return
case '/': case '/':
...@@ -1426,45 +1243,144 @@ asop: ...@@ -1426,45 +1243,144 @@ asop:
fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op]) fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
} }
l.tok = LASOP l.tok = LASOP
return }
func (l *lexer) ident(c rune) {
cp := &lexbuf
cp.Reset()
// accelerate common case (7bit ASCII)
for isLetter(c) || isDigit(c) {
cp.WriteByte(byte(c))
c = l.getr()
}
// general case
for {
if c >= utf8.RuneSelf {
if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
if cp.Len() == 0 && unicode.IsDigit(c) {
Yyerror("identifier cannot begin with digit %#U", c)
}
} else {
Yyerror("invalid identifier character %#U", c)
}
cp.WriteRune(c)
} else if isLetter(c) || isDigit(c) {
cp.WriteByte(byte(c))
} else {
break
}
c = l.getr()
}
ncu:
cp = nil cp = nil
l.ungetr(c) l.ungetr(c)
str = lexbuf.String() s := LookupBytes(lexbuf.Bytes())
l.val.U = new(Mpint) if Debug['x'] != 0 {
mpatofix(l.val.U.(*Mpint), str) fmt.Printf("lex: %s %s\n", s, lexname(rune(s.Lexical)))
if l.val.U.(*Mpint).Ovf {
Yyerror("overflow in constant")
Mpmovecfix(l.val.U.(*Mpint), 0)
} }
l.sym_ = s
switch s.Lexical {
case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
l.nlsemi = true
}
l.tok = int32(s.Lexical)
}
if Debug['x'] != 0 { func (l *lexer) number(c rune) {
fmt.Printf("lex: integer literal\n") // TODO(gri) this can be done nicely with fewer or even without labels
var str string
cp := &lexbuf
cp.Reset()
if c != '.' {
if c != '0' {
for isDigit(c) {
cp.WriteByte(byte(c))
c = l.getr()
}
if c == '.' {
goto casedot
}
if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
goto caseep
}
if c == 'i' {
goto casei
}
goto ncu
}
// c == 0
cp.WriteByte('0')
c = l.getr()
if c == 'x' || c == 'X' {
cp.WriteByte(byte(c))
c = l.getr()
for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
cp.WriteByte(byte(c))
c = l.getr()
}
if lexbuf.Len() == 2 {
Yyerror("malformed hex constant")
}
if c == 'p' {
goto caseep
}
goto ncu
}
if c == 'p' { // 0p begins floating point zero
goto caseep
}
has8or9 := false
for isDigit(c) {
if c > '7' {
has8or9 = true
}
cp.WriteByte(byte(c))
c = l.getr()
}
if c == '.' {
goto casedot
}
if c == 'e' || c == 'E' {
goto caseep
}
if c == 'i' {
goto casei
}
if has8or9 {
Yyerror("malformed octal constant")
}
goto ncu
} }
litbuf = "literal " + str
l.nlsemi = true
l.tok = LLITERAL
return
casedot: casedot:
for { // fraction
// c == '.'
cp.WriteByte('.')
c = l.getr()
for isDigit(c) {
cp.WriteByte(byte(c)) cp.WriteByte(byte(c))
c = l.getr() c = l.getr()
if !isDigit(c) {
break
}
} }
if c == 'i' { if c == 'i' {
goto casei goto casei
} }
if c != 'e' && c != 'E' { if c != 'e' && c != 'E' {
goto caseout goto caseout
} }
// base-2-exponents (p or P) don't appear in numbers
// with fractions - ok to not test for 'p' or 'P'
// above
caseep: caseep:
// exponent
if importpkg == nil && (c == 'p' || c == 'P') { if importpkg == nil && (c == 'p' || c == 'P') {
// <mantissa>p<base-2-exponent> is allowed in .a/.o imports, // <mantissa>p<base-2-exponent> is allowed in .a/.o imports,
// but not in .go sources. See #9036. // but not in .go sources. See #9036.
...@@ -1485,13 +1401,12 @@ caseep: ...@@ -1485,13 +1401,12 @@ caseep:
c = l.getr() c = l.getr()
} }
if c == 'i' { if c != 'i' {
goto casei goto caseout
} }
goto caseout
// imaginary constant
casei: casei:
// imaginary constant
cp = nil cp = nil
str = lexbuf.String() str = lexbuf.String()
...@@ -1506,10 +1421,7 @@ casei: ...@@ -1506,10 +1421,7 @@ casei:
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: imaginary literal\n") fmt.Printf("lex: imaginary literal\n")
} }
litbuf = "literal " + str goto done
l.nlsemi = true
l.tok = LLITERAL
return
caseout: caseout:
cp = nil cp = nil
...@@ -1526,12 +1438,47 @@ caseout: ...@@ -1526,12 +1438,47 @@ caseout:
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: floating literal\n") fmt.Printf("lex: floating literal\n")
} }
goto done
ncu:
cp = nil
l.ungetr(c)
str = lexbuf.String()
l.val.U = new(Mpint)
mpatofix(l.val.U.(*Mpint), str)
if l.val.U.(*Mpint).Ovf {
Yyerror("overflow in constant")
Mpmovecfix(l.val.U.(*Mpint), 0)
}
if Debug['x'] != 0 {
fmt.Printf("lex: integer literal\n")
}
done:
litbuf = "literal " + str litbuf = "literal " + str
l.nlsemi = true l.nlsemi = true
l.tok = LLITERAL l.tok = LLITERAL
return }
func (l *lexer) stdString() {
lexbuf.Reset()
lexbuf.WriteString(`"<string>"`)
cp := &strbuf
cp.Reset()
var escflag int
var v int64
for !l.escchar('"', &escflag, &v) {
if v < utf8.RuneSelf || escflag != 0 {
cp.WriteByte(byte(v))
} else {
cp.WriteRune(rune(v))
}
}
strlit:
l.val.U = internString(cp.Bytes()) l.val.U = internString(cp.Bytes())
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: string literal\n") fmt.Printf("lex: string literal\n")
...@@ -1541,6 +1488,62 @@ strlit: ...@@ -1541,6 +1488,62 @@ strlit:
l.tok = LLITERAL l.tok = LLITERAL
} }
func (l *lexer) rawString() {
lexbuf.Reset()
lexbuf.WriteString("`<string>`")
cp := &strbuf
cp.Reset()
for {
c := l.getr()
if c == '\r' {
continue
}
if c == EOF {
Yyerror("eof in string")
break
}
if c == '`' {
break
}
cp.WriteRune(c)
}
l.val.U = internString(cp.Bytes())
if Debug['x'] != 0 {
fmt.Printf("lex: string literal\n")
}
litbuf = "string literal"
l.nlsemi = true
l.tok = LLITERAL
}
func (l *lexer) rune() {
var escflag int
var v int64
if l.escchar('\'', &escflag, &v) {
Yyerror("empty character literal or unescaped ' in character literal")
v = '\''
}
if !l.escchar('\'', &escflag, &v) {
Yyerror("missing '")
l.ungetr(rune(v))
}
x := new(Mpint)
l.val.U = x
Mpmovecfix(x, v)
x.Rune = true
if Debug['x'] != 0 {
fmt.Printf("lex: codepoint literal\n")
}
litbuf = "rune literal"
l.nlsemi = true
l.tok = LLITERAL
}
var internedStrings = map[string]string{} var internedStrings = map[string]string{}
func internString(b []byte) string { func internString(b []byte) string {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment