Commit d17727bd authored by Matthew Dempsky's avatar Matthew Dempsky

cmd/compile: cleanup escape sequence lexing

Change-Id: I7fe4d0cdcc284d5319c130ee3c351f23489af273
Reviewed-on: https://go-review.googlesource.com/19902Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 58887634
...@@ -1469,13 +1469,15 @@ func (l *lexer) stdString() { ...@@ -1469,13 +1469,15 @@ func (l *lexer) stdString() {
cp := &strbuf cp := &strbuf
cp.Reset() cp.Reset()
var escflag int for {
var v int64 r, b, ok := l.onechar('"')
for !l.escchar('"', &escflag, &v) { if !ok {
if v < utf8.RuneSelf || escflag != 0 { break
cp.WriteByte(byte(v)) }
if r == 0 {
cp.WriteByte(b)
} else { } else {
cp.WriteRune(rune(v)) cp.WriteRune(r)
} }
} }
...@@ -1520,21 +1522,23 @@ func (l *lexer) rawString() { ...@@ -1520,21 +1522,23 @@ func (l *lexer) rawString() {
} }
func (l *lexer) rune() { func (l *lexer) rune() {
var escflag int r, b, ok := l.onechar('\'')
var v int64 if !ok {
if l.escchar('\'', &escflag, &v) {
Yyerror("empty character literal or unescaped ' in character literal") Yyerror("empty character literal or unescaped ' in character literal")
v = '\'' r = '\''
}
if r == 0 {
r = rune(b)
} }
if !l.escchar('\'', &escflag, &v) { if c := l.getr(); c != '\'' {
Yyerror("missing '") Yyerror("missing '")
l.ungetr(rune(v)) l.ungetr(c)
} }
x := new(Mpint) x := new(Mpint)
l.val.U = x l.val.U = x
Mpmovecfix(x, v) Mpmovecfix(x, int64(r))
x.Rune = true x.Rune = true
if Debug['x'] != 0 { if Debug['x'] != 0 {
fmt.Printf("lex: codepoint literal\n") fmt.Printf("lex: codepoint literal\n")
...@@ -1889,63 +1893,48 @@ func (l *lexer) ungetr(r rune) { ...@@ -1889,63 +1893,48 @@ func (l *lexer) ungetr(r rune) {
} }
} }
func (l *lexer) escchar(e rune, escflg *int, val *int64) bool { // onechar lexes a single character within a rune or interpreted string literal,
*escflg = 0 // handling escape sequences as necessary.
func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) {
c := l.getr() c := l.getr()
switch c { switch c {
case EOF: case EOF:
Yyerror("eof in string") Yyerror("eof in string")
return true l.ungetr(EOF)
return
case '\n': case '\n':
Yyerror("newline in string") Yyerror("newline in string")
return true l.ungetr('\n')
return
case '\\': case '\\':
break break
case quote:
return
default: default:
if c == e { return c, 0, true
return true
}
*val = int64(c)
return false
} }
u := 0
c = l.getr() c = l.getr()
var i int
switch c { switch c {
case 'x': case 'x':
*escflg = 1 // it's a byte return 0, byte(l.hexchar(2)), true
i = 2
goto hex
case 'u': case 'u':
i = 4 return l.unichar(4), 0, true
u = 1
goto hex
case 'U': case 'U':
i = 8 return l.unichar(8), 0, true
u = 1
goto hex case '0', '1', '2', '3', '4', '5', '6', '7':
x := c - '0'
case '0',
'1',
'2',
'3',
'4',
'5',
'6',
'7':
*escflg = 1 // it's a byte
x := int64(c) - '0'
for i := 2; i > 0; i-- { for i := 2; i > 0; i-- {
c = l.getr() c = l.getr()
if c >= '0' && c <= '7' { if c >= '0' && c <= '7' {
x = x*8 + int64(c) - '0' x = x*8 + c - '0'
continue continue
} }
...@@ -1957,8 +1946,7 @@ func (l *lexer) escchar(e rune, escflg *int, val *int64) bool { ...@@ -1957,8 +1946,7 @@ func (l *lexer) escchar(e rune, escflg *int, val *int64) bool {
Yyerror("octal escape value > 255: %d", x) Yyerror("octal escape value > 255: %d", x)
} }
*val = x return 0, byte(x), true
return false
case 'a': case 'a':
c = '\a' c = '\a'
...@@ -1978,45 +1966,44 @@ func (l *lexer) escchar(e rune, escflg *int, val *int64) bool { ...@@ -1978,45 +1966,44 @@ func (l *lexer) escchar(e rune, escflg *int, val *int64) bool {
c = '\\' c = '\\'
default: default:
if c != e { if c != quote {
Yyerror("unknown escape sequence: %c", c) Yyerror("unknown escape sequence: %c", c)
} }
} }
*val = int64(c) return c, 0, true
return false }
hex:
x := int64(0)
for ; i > 0; i-- {
c = l.getr()
if c >= '0' && c <= '9' {
x = x*16 + int64(c) - '0'
continue
}
if c >= 'a' && c <= 'f' {
x = x*16 + int64(c) - 'a' + 10
continue
}
if c >= 'A' && c <= 'F' { func (l *lexer) unichar(n int) rune {
x = x*16 + int64(c) - 'A' + 10 x := l.hexchar(n)
continue if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 {
Yyerror("invalid Unicode code point in escape sequence: %#x", x)
x = utf8.RuneError
} }
return rune(x)
}
func (l *lexer) hexchar(n int) uint32 {
var x uint32
for ; n > 0; n-- {
var d uint32
switch c := l.getr(); {
case isDigit(c):
d = uint32(c - '0')
case 'a' <= c && c <= 'f':
d = uint32(c - 'a' + 10)
case 'A' <= c && c <= 'F':
d = uint32(c - 'A' + 10)
default:
Yyerror("non-hex character in escape sequence: %c", c) Yyerror("non-hex character in escape sequence: %c", c)
l.ungetr(c) l.ungetr(c)
break return x
} }
x = x*16 + d
if u != 0 && (x > utf8.MaxRune || (0xd800 <= x && x < 0xe000)) {
Yyerror("invalid Unicode code point in escape sequence: %#x", x)
x = utf8.RuneError
} }
*val = x return x
return false
} }
var syms = []struct { var syms = []struct {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment