Commit df37c4b9 authored by Didier Spezia's avatar Didier Spezia Committed by Rob Pike

cmd/asm: fix several panics with erroneous input

The parser tries to read as much information as possible,
issuing some errors when needed. Errors generally do not
stop the parsing.

With some pathological input, it may result in various
panics when the error message itself is built, or when the
next operand is parsed. It happens while parsing
pseudo-instructions.

For instance, the following lines all generate a panic:

	TEXT
	TEXT%
	TEXT 1,1
	TEXT $"toto", 0, $1
	FUNCDATA
	DATA 0
	DATA(0),1
	FUNCDATA(SB
	GLOBL 0, 1
	PCDATA 1

Added corresponding tests.

Introduced a writer in the parser to capture error messages
for testing purpose. It defaults to os.Stderr.

Added an explicit check when symbol names cannot be displayed.

Interrupted parsing early when the number of operands is wrong for
pseudo-instructions.

Note that the last point is a change of behavior, because some
operands will not get parsed anymore in case of early error.

IMO, it is acceptable, because only the first error of the line
is considered anyway. If it is not acceptable, it can probably
be improved at the price of a more verbose CL.

Fixes #11765
Fixes #11760
Fixes #11759

Change-Id: I9602a848132e358a1bccad794d7555e0823970dd
Reviewed-on: https://go-review.googlesource.com/13925Reviewed-by: default avatarRob Pike <r@golang.org>
parent dc3540d9
......@@ -66,10 +66,10 @@ func (p *Parser) append(prog *obj.Prog, cond string, doLabel bool) {
// validateSymbol checks that addr represents a valid name for a pseudo-op.
func (p *Parser) validateSymbol(pseudo string, addr *obj.Addr, offsetOk bool) {
if addr.Name != obj.NAME_EXTERN && addr.Name != obj.NAME_STATIC || addr.Scale != 0 || addr.Reg != 0 {
p.errorf("%s symbol %q must be a symbol(SB)", pseudo, addr.Sym.Name)
p.errorf("%s symbol %q must be a symbol(SB)", pseudo, symbolName(addr))
}
if !offsetOk && addr.Offset != 0 {
p.errorf("%s symbol %q must not be offset from SB", pseudo, addr.Sym.Name)
p.errorf("%s symbol %q must not be offset from SB", pseudo, symbolName(addr))
}
}
......@@ -91,6 +91,7 @@ func (p *Parser) validateImmediate(pseudo string, addr *obj.Addr) {
func (p *Parser) asmText(word string, operands [][]lex.Token) {
if len(operands) != 2 && len(operands) != 3 {
p.errorf("expect two or three operands for TEXT")
return
}
// Labels are function scoped. Patch existing labels and
......@@ -102,7 +103,7 @@ func (p *Parser) asmText(word string, operands [][]lex.Token) {
// That means symbol plus indirect on SB and no offset.
nameAddr := p.address(operands[0])
p.validateSymbol("TEXT", &nameAddr, false)
name := nameAddr.Sym.Name
name := symbolName(&nameAddr)
next := 1
// Next operand is the optional text flag, a literal integer.
......@@ -171,6 +172,7 @@ func (p *Parser) asmText(word string, operands [][]lex.Token) {
func (p *Parser) asmData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for DATA")
return
}
// Operand 0 has the general form foo<>+0x04(SB)/4.
......@@ -178,12 +180,13 @@ func (p *Parser) asmData(word string, operands [][]lex.Token) {
n := len(op)
if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int {
p.errorf("expect /size for DATA argument")
return
}
scale := p.parseScale(op[n-1].String())
op = op[:n-2]
nameAddr := p.address(op)
p.validateSymbol("DATA", &nameAddr, true)
name := nameAddr.Sym.Name
name := symbolName(&nameAddr)
// Operand 1 is an immediate constant or address.
valueAddr := p.address(operands[1])
......@@ -220,6 +223,7 @@ func (p *Parser) asmData(word string, operands [][]lex.Token) {
func (p *Parser) asmGlobl(word string, operands [][]lex.Token) {
if len(operands) != 2 && len(operands) != 3 {
p.errorf("expect two or three operands for GLOBL")
return
}
// Operand 0 has the general form foo<>+0x04(SB).
......@@ -257,6 +261,7 @@ func (p *Parser) asmGlobl(word string, operands [][]lex.Token) {
func (p *Parser) asmPCData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for PCDATA")
return
}
// Operand 0 must be an immediate constant.
......@@ -283,6 +288,7 @@ func (p *Parser) asmPCData(word string, operands [][]lex.Token) {
func (p *Parser) asmFuncData(word string, operands [][]lex.Token) {
if len(operands) != 2 {
p.errorf("expect two operands for FUNCDATA")
return
}
// Operand 0 must be an immediate constant.
......@@ -622,6 +628,14 @@ func newAddr(x obj.Addr) *obj.Addr {
return p
}
// symbolName returns the symbol name, or an error string if none if available.
func symbolName(addr *obj.Addr) string {
if addr.Sym != nil {
return addr.Sym.Name
}
return "<erroneous symbol>"
}
var emptyProg obj.Prog
// getConstantPseudo checks that addr represents a plain constant and returns its value.
......
......@@ -8,6 +8,7 @@ package asm
import (
"fmt"
"io"
"log"
"os"
"strconv"
......@@ -37,6 +38,7 @@ type Parser struct {
firstProg *obj.Prog
lastProg *obj.Prog
dataAddr map[string]int64 // Most recent address for DATA for this symbol.
errorWriter io.Writer
}
type Patch struct {
......@@ -46,11 +48,12 @@ type Patch struct {
func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
return &Parser{
ctxt: ctxt,
arch: ar,
lex: lexer,
labels: make(map[string]*obj.Prog),
dataAddr: make(map[string]int64),
ctxt: ctxt,
arch: ar,
lex: lexer,
labels: make(map[string]*obj.Prog),
dataAddr: make(map[string]int64),
errorWriter: os.Stderr,
}
}
......@@ -67,10 +70,12 @@ func (p *Parser) errorf(format string, args ...interface{}) {
return
}
p.errorLine = p.histLineNum
// Put file and line information on head of message.
format = "%s:%d: " + format + "\n"
args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
fmt.Fprintf(os.Stderr, format, args...)
if p.lex != nil {
// Put file and line information on head of message.
format = "%s:%d: " + format + "\n"
args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
}
fmt.Fprintf(p.errorWriter, format, args...)
p.errorCount++
if p.errorCount > 10 {
log.Fatal("too many errors")
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package asm
import (
"bytes"
"strings"
"testing"
"cmd/asm/internal/arch"
"cmd/asm/internal/lex"
)
func tokenize(s string) [][]lex.Token {
res := [][]lex.Token{}
if len(s) == 0 {
return res
}
for _, o := range strings.Split(s, ",") {
res = append(res, lex.Tokenize(o))
}
return res
}
func TestErroneous(t *testing.T) {
tests := []struct {
pseudo string
operands string
expected string
}{
{"TEXT", "", "expect two or three operands for TEXT"},
{"TEXT", "%", "expect two or three operands for TEXT"},
{"TEXT", "1, 1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
{"TEXT", "$\"foo\", 0, $1", "TEXT symbol \"<erroneous symbol>\" must be a symbol(SB)"},
{"FUNCDATA", "", "expect two operands for FUNCDATA"},
{"FUNCDATA", "(SB ", "expect two operands for FUNCDATA"},
{"DATA", "", "expect two operands for DATA"},
{"DATA", "0", "expect two operands for DATA"},
{"DATA", "(0), 1", "expect /size for DATA argument"},
{"GLOBL", "", "expect two or three operands for GLOBL"},
{"GLOBL", "0,1", "GLOBL symbol \"<erroneous symbol>\" must be a symbol(SB)"},
{"PCDATA", "", "expect two operands for PCDATA"},
{"PCDATA", "1", "expect two operands for PCDATA"},
}
// Note these errors should be independent of the architecture.
// Just run the test with amd64.
parser := newParser("amd64")
var buf bytes.Buffer
parser.errorWriter = &buf
for _, test := range tests {
parser.errorCount = 0
parser.lineNum++
parser.histLineNum++
op, ok := arch.Pseudos[test.pseudo]
if !ok {
t.Fatalf("Wrong pseudo-instruction: %s", test.pseudo)
}
parser.pseudo(op, test.pseudo, tokenize(test.operands))
errorLine := buf.String()
if test.expected != errorLine {
t.Errorf("Unexpected error %q; expected %q", errorLine, test.expected)
}
buf.Reset()
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment