Commit 0442087f authored by Gustavo Niemeyer's avatar Gustavo Niemeyer

encoding/xml: bring API closer to other packages

Includes gofix module. The only case not covered should be
xml.Unmarshal, since it remains with a similar interface, and
would require introspecting the type of its first argument
better.

Fixes #2626.

R=golang-dev, rsc, gustavo
CC=golang-dev
https://golang.org/cl/5574053
parent 6d7e9382
...@@ -587,7 +587,7 @@ func commitPoll(key, pkg string) { ...@@ -587,7 +587,7 @@ func commitPoll(key, pkg string) {
var logStruct struct { var logStruct struct {
Log []HgLog Log []HgLog
} }
err = xml.Unmarshal(strings.NewReader("<Top>"+data+"</Top>"), &logStruct) err = xml.Unmarshal([]byte("<Top>"+data+"</Top>"), &logStruct)
if err != nil { if err != nil {
log.Printf("unmarshal hg log: %v", err) log.Printf("unmarshal hg log: %v", err)
return return
......
...@@ -115,9 +115,9 @@ func loadCodewalk(filename string) (*Codewalk, error) { ...@@ -115,9 +115,9 @@ func loadCodewalk(filename string) (*Codewalk, error) {
} }
defer f.Close() defer f.Close()
cw := new(Codewalk) cw := new(Codewalk)
p := xml.NewParser(f) d := xml.NewDecoder(f)
p.Entity = xml.HTMLEntity d.Entity = xml.HTMLEntity
err = p.Unmarshal(cw, nil) err = d.Decode(cw)
if err != nil { if err != nil {
return nil, &os.PathError{"parsing", filename, err} return nil, &os.PathError{"parsing", filename, err}
} }
......
...@@ -42,6 +42,7 @@ GOFILES=\ ...@@ -42,6 +42,7 @@ GOFILES=\
timefileinfo.go\ timefileinfo.go\
typecheck.go\ typecheck.go\
url.go\ url.go\
xmlapi.go\
include ../../Make.cmd include ../../Make.cmd
......
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"go/ast"
)
func init() {
register(xmlapiFix)
}
var xmlapiFix = fix{
"xmlapi",
"2012-01-23",
xmlapi,
`
Make encoding/xml's API look more like the rest of the encoding packages.
http://codereview.appspot.com/5574053
`,
}
var xmlapiTypeConfig = &TypeConfig{
Func: map[string]string{
"xml.NewParser": "xml.Parser",
},
}
func xmlapi(f *ast.File) bool {
if !imports(f, "encoding/xml") {
return false
}
typeof, _ := typecheck(xmlapiTypeConfig, f)
fixed := false
walk(f, func(n interface{}) {
s, ok := n.(*ast.SelectorExpr)
if ok && typeof[s.X] == "xml.Parser" && s.Sel.Name == "Unmarshal" {
s.Sel.Name = "DecodeElement"
fixed = true
return
}
if ok && isPkgDot(s, "xml", "Parser") {
s.Sel.Name = "Decoder"
fixed = true
return
}
call, ok := n.(*ast.CallExpr)
if !ok {
return
}
switch {
case len(call.Args) == 2 && isPkgDot(call.Fun, "xml", "Marshal"):
*call = xmlMarshal(call.Args)
fixed = true
// Can't fix without further diving into the type of call.Args[0].
//case len(call.Args) == 2 && isPkgDot(call.Fun, "xml", "Unmarshal"):
// *call = xmlUnmarshal(call.Args)
// fixed = true
case len(call.Args) == 1 && isPkgDot(call.Fun, "xml", "NewParser"):
sel := call.Fun.(*ast.SelectorExpr).Sel
sel.Name = "NewDecoder"
fixed = true
}
})
return fixed
}
func xmlMarshal(args []ast.Expr) ast.CallExpr {
return xmlCallChain("NewEncoder", "Encode", args)
}
func xmlUnmarshal(args []ast.Expr) ast.CallExpr {
return xmlCallChain("NewDecoder", "Decode", args)
}
func xmlCallChain(first, second string, args []ast.Expr) ast.CallExpr {
return ast.CallExpr{
Fun: &ast.SelectorExpr{
X: &ast.CallExpr{
Fun: &ast.SelectorExpr{
X: ast.NewIdent("xml"),
Sel: ast.NewIdent(first),
},
Args: args[:1],
},
Sel: ast.NewIdent(second),
},
Args: args[1:2],
}
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
func init() {
addTestCases(xmlapiTests, xmlapi)
}
var xmlapiTests = []testCase{
{
Name: "xmlapi.0",
In: `package main
import "encoding/xml"
func f() {
xml.Marshal(a, b)
xml.Unmarshal(a, b)
p1 := xml.NewParser(stream)
p1.Unmarshal(v, start)
var p2 xml.Parser
p2.Unmarshal(v, start)
}
`,
Out: `package main
import "encoding/xml"
func f() {
xml.NewEncoder(a).Encode(b)
xml.Unmarshal(a, b)
p1 := xml.NewDecoder(stream)
p1.DecodeElement(v, start)
var p2 xml.Decoder
p2.DecodeElement(v, start)
}
`,
},
}
...@@ -26,11 +26,7 @@ type Marshaler interface { ...@@ -26,11 +26,7 @@ type Marshaler interface {
MarshalXML() ([]byte, error) MarshalXML() ([]byte, error)
} }
type printer struct { // Marshal returns the XML encoding of v.
*bufio.Writer
}
// Marshal writes an XML-formatted representation of v to w.
// //
// If v implements Marshaler, then Marshal calls its MarshalXML method. // If v implements Marshaler, then Marshal calls its MarshalXML method.
// Otherwise, Marshal uses the following procedure to create the XML. // Otherwise, Marshal uses the following procedure to create the XML.
...@@ -76,7 +72,7 @@ type printer struct { ...@@ -76,7 +72,7 @@ type printer struct {
// Age int `xml:"person>age"` // Age int `xml:"person>age"`
// } // }
// //
// xml.Marshal(w, &Result{Id: 13, FirstName: "John", LastName: "Doe", Age: 42}) // xml.Marshal(&Result{Id: 13, FirstName: "John", LastName: "Doe", Age: 42})
// //
// would be marshalled as: // would be marshalled as:
// //
...@@ -91,13 +87,38 @@ type printer struct { ...@@ -91,13 +87,38 @@ type printer struct {
// </result> // </result>
// //
// Marshal will return an error if asked to marshal a channel, function, or map. // Marshal will return an error if asked to marshal a channel, function, or map.
func Marshal(w io.Writer, v interface{}) (err error) { func Marshal(v interface{}) ([]byte, error) {
p := &printer{bufio.NewWriter(w)} var b bytes.Buffer
err = p.marshalValue(reflect.ValueOf(v), nil) if err := NewEncoder(&b).Encode(v); err != nil {
p.Flush() return nil, err
}
return b.Bytes(), nil
}
// An Encoder writes XML data to an output stream.
type Encoder struct {
printer
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{printer{bufio.NewWriter(w)}}
}
// Encode writes the XML encoding of v to the stream.
//
// See the documentation for Marshal for details about the conversion
// of Go values to XML.
func (enc *Encoder) Encode(v interface{}) error {
err := enc.marshalValue(reflect.ValueOf(v), nil)
enc.Flush()
return err return err
} }
type printer struct {
*bufio.Writer
}
func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error { func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error {
if !val.IsValid() { if !val.IsValid() {
return nil return nil
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
package xml package xml
import ( import (
"bytes"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
...@@ -619,13 +618,12 @@ func TestMarshal(t *testing.T) { ...@@ -619,13 +618,12 @@ func TestMarshal(t *testing.T) {
if test.UnmarshalOnly { if test.UnmarshalOnly {
continue continue
} }
buf := bytes.NewBuffer(nil) data, err := Marshal(test.Value)
err := Marshal(buf, test.Value)
if err != nil { if err != nil {
t.Errorf("#%d: Error: %s", idx, err) t.Errorf("#%d: Error: %s", idx, err)
continue continue
} }
if got, want := buf.String(), test.ExpectXML; got != want { if got, want := string(data), test.ExpectXML; got != want {
if strings.Contains(want, "\n") { if strings.Contains(want, "\n") {
t.Errorf("#%d: marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", idx, test.Value, got, want) t.Errorf("#%d: marshal(%#v):\nHAVE:\n%s\nWANT:\n%s", idx, test.Value, got, want)
} else { } else {
...@@ -666,8 +664,7 @@ var marshalErrorTests = []struct { ...@@ -666,8 +664,7 @@ var marshalErrorTests = []struct {
func TestMarshalErrors(t *testing.T) { func TestMarshalErrors(t *testing.T) {
for idx, test := range marshalErrorTests { for idx, test := range marshalErrorTests {
buf := bytes.NewBuffer(nil) _, err := Marshal(test.Value)
err := Marshal(buf, test.Value)
if err == nil || err.Error() != test.Err { if err == nil || err.Error() != test.Err {
t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err) t.Errorf("#%d: marshal(%#v) = [error] %v, want %v", idx, test.Value, err, test.Err)
} }
...@@ -691,8 +688,7 @@ func TestUnmarshal(t *testing.T) { ...@@ -691,8 +688,7 @@ func TestUnmarshal(t *testing.T) {
vt := reflect.TypeOf(test.Value) vt := reflect.TypeOf(test.Value)
dest := reflect.New(vt.Elem()).Interface() dest := reflect.New(vt.Elem()).Interface()
buffer := bytes.NewBufferString(test.ExpectXML) err := Unmarshal([]byte(test.ExpectXML), dest)
err := Unmarshal(buffer, dest)
switch fix := dest.(type) { switch fix := dest.(type) {
case *Feed: case *Feed:
...@@ -711,17 +707,14 @@ func TestUnmarshal(t *testing.T) { ...@@ -711,17 +707,14 @@ func TestUnmarshal(t *testing.T) {
} }
func BenchmarkMarshal(b *testing.B) { func BenchmarkMarshal(b *testing.B) {
buf := bytes.NewBuffer(nil)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Marshal(buf, atomValue) Marshal(atomValue)
buf.Truncate(0)
} }
} }
func BenchmarkUnmarshal(b *testing.B) { func BenchmarkUnmarshal(b *testing.B) {
xml := []byte(atomXml) xml := []byte(atomXml)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
buffer := bytes.NewBuffer(xml) Unmarshal(xml, &Feed{})
Unmarshal(buffer, &Feed{})
} }
} }
...@@ -7,7 +7,6 @@ package xml ...@@ -7,7 +7,6 @@ package xml
import ( import (
"bytes" "bytes"
"errors" "errors"
"io"
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
...@@ -20,10 +19,10 @@ import ( ...@@ -20,10 +19,10 @@ import (
// See package json for a textual representation more suitable // See package json for a textual representation more suitable
// to data structures. // to data structures.
// Unmarshal parses an XML element from r and uses the // Unmarshal parses the XML-encoded data and stores the result in
// reflect library to fill in an arbitrary struct, slice, or string // the value pointed to by v, which must be an arbitrary struct,
// pointed at by val. Well-formed data that does not fit // slice, or string. Well-formed data that does not fit into v is
// into val is discarded. // discarded.
// //
// For example, given these definitions: // For example, given these definitions:
// //
...@@ -59,7 +58,7 @@ import ( ...@@ -59,7 +58,7 @@ import (
// <address>123 Main Street</address> // <address>123 Main Street</address>
// </result> // </result>
// //
// via Unmarshal(r, &result) is equivalent to assigning // via Unmarshal(data, &result) is equivalent to assigning
// //
// r = Result{ // r = Result{
// xml.Name{Local: "result"}, // xml.Name{Local: "result"},
...@@ -157,18 +156,26 @@ import ( ...@@ -157,18 +156,26 @@ import (
// Unmarshal maps an XML element to a pointer by setting the pointer // Unmarshal maps an XML element to a pointer by setting the pointer
// to a freshly allocated value and then mapping the element to that value. // to a freshly allocated value and then mapping the element to that value.
// //
func Unmarshal(r io.Reader, val interface{}) error { func Unmarshal(data []byte, v interface{}) error {
v := reflect.ValueOf(val) return NewDecoder(bytes.NewBuffer(data)).Decode(v)
if v.Kind() != reflect.Ptr { }
// Decode works like xml.Unmarshal, except it reads the decoder
// stream to find the start element.
func (d *Decoder) Decode(v interface{}) error {
return d.DecodeElement(v, nil)
}
// DecodeElement works like xml.Unmarshal except that it takes
// a pointer to the start XML element to decode into v.
// It is useful when a client reads some raw XML tokens itself
// but also wants to defer to Unmarshal for some elements.
func (d *Decoder) DecodeElement(v interface{}, start *StartElement) error {
val := reflect.ValueOf(v)
if val.Kind() != reflect.Ptr {
return errors.New("non-pointer passed to Unmarshal") return errors.New("non-pointer passed to Unmarshal")
} }
p := NewParser(r) return d.unmarshal(val.Elem(), start)
elem := v.Elem()
err := p.unmarshal(elem, nil)
if err != nil {
return err
}
return nil
} }
// An UnmarshalError represents an error in the unmarshalling process. // An UnmarshalError represents an error in the unmarshalling process.
...@@ -176,22 +183,8 @@ type UnmarshalError string ...@@ -176,22 +183,8 @@ type UnmarshalError string
func (e UnmarshalError) Error() string { return string(e) } func (e UnmarshalError) Error() string { return string(e) }
// The Parser's Unmarshal method is like xml.Unmarshal
// except that it can be passed a pointer to the initial start element,
// useful when a client reads some raw XML tokens itself
// but also defers to Unmarshal for some elements.
// Passing a nil start element indicates that Unmarshal should
// read the token stream to find the start element.
func (p *Parser) Unmarshal(val interface{}, start *StartElement) error {
v := reflect.ValueOf(val)
if v.Kind() != reflect.Ptr {
return errors.New("non-pointer passed to Unmarshal")
}
return p.unmarshal(v.Elem(), start)
}
// Unmarshal a single XML element into val. // Unmarshal a single XML element into val.
func (p *Parser) unmarshal(val reflect.Value, start *StartElement) error { func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
// Find start element if we need it. // Find start element if we need it.
if start == nil { if start == nil {
for { for {
...@@ -484,9 +477,9 @@ func copyValue(dst reflect.Value, src []byte) (err error) { ...@@ -484,9 +477,9 @@ func copyValue(dst reflect.Value, src []byte) (err error) {
// unmarshalPath walks down an XML structure looking for wanted // unmarshalPath walks down an XML structure looking for wanted
// paths, and calls unmarshal on them. // paths, and calls unmarshal on them.
// The consumed result tells whether XML elements have been consumed // The consumed result tells whether XML elements have been consumed
// from the Parser until start's matching end element, or if it's // from the Decoder until start's matching end element, or if it's
// still untouched because start is uninteresting for sv's fields. // still untouched because start is uninteresting for sv's fields.
func (p *Parser) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) { func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start *StartElement) (consumed bool, err error) {
recurse := false recurse := false
Loop: Loop:
for i := range tinfo.fields { for i := range tinfo.fields {
...@@ -550,7 +543,7 @@ Loop: ...@@ -550,7 +543,7 @@ Loop:
// Read tokens until we find the end element. // Read tokens until we find the end element.
// Token is taking care of making sure the // Token is taking care of making sure the
// end element matches the start element we saw. // end element matches the start element we saw.
func (p *Parser) Skip() error { func (p *Decoder) Skip() error {
for { for {
tok, err := p.Token() tok, err := p.Token()
if err != nil { if err != nil {
......
...@@ -6,7 +6,6 @@ package xml ...@@ -6,7 +6,6 @@ package xml
import ( import (
"reflect" "reflect"
"strings"
"testing" "testing"
) )
...@@ -14,7 +13,7 @@ import ( ...@@ -14,7 +13,7 @@ import (
func TestUnmarshalFeed(t *testing.T) { func TestUnmarshalFeed(t *testing.T) {
var f Feed var f Feed
if err := Unmarshal(strings.NewReader(atomFeedString), &f); err != nil { if err := Unmarshal([]byte(atomFeedString), &f); err != nil {
t.Fatalf("Unmarshal: %s", err) t.Fatalf("Unmarshal: %s", err)
} }
if !reflect.DeepEqual(f, atomFeed) { if !reflect.DeepEqual(f, atomFeed) {
...@@ -281,7 +280,7 @@ var pathTests = []interface{}{ ...@@ -281,7 +280,7 @@ var pathTests = []interface{}{
func TestUnmarshalPaths(t *testing.T) { func TestUnmarshalPaths(t *testing.T) {
for _, pt := range pathTests { for _, pt := range pathTests {
v := reflect.New(reflect.TypeOf(pt).Elem()).Interface() v := reflect.New(reflect.TypeOf(pt).Elem()).Interface()
if err := Unmarshal(strings.NewReader(pathTestString), v); err != nil { if err := Unmarshal([]byte(pathTestString), v); err != nil {
t.Fatalf("Unmarshal: %s", err) t.Fatalf("Unmarshal: %s", err)
} }
if !reflect.DeepEqual(v, pt) { if !reflect.DeepEqual(v, pt) {
...@@ -331,7 +330,7 @@ var badPathTests = []struct { ...@@ -331,7 +330,7 @@ var badPathTests = []struct {
func TestUnmarshalBadPaths(t *testing.T) { func TestUnmarshalBadPaths(t *testing.T) {
for _, tt := range badPathTests { for _, tt := range badPathTests {
err := Unmarshal(strings.NewReader(pathTestString), tt.v) err := Unmarshal([]byte(pathTestString), tt.v)
if !reflect.DeepEqual(err, tt.e) { if !reflect.DeepEqual(err, tt.e) {
t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e) t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e)
} }
...@@ -350,7 +349,7 @@ type TestThree struct { ...@@ -350,7 +349,7 @@ type TestThree struct {
func TestUnmarshalWithoutNameType(t *testing.T) { func TestUnmarshalWithoutNameType(t *testing.T) {
var x TestThree var x TestThree
if err := Unmarshal(strings.NewReader(withoutNameTypeData), &x); err != nil { if err := Unmarshal([]byte(withoutNameTypeData), &x); err != nil {
t.Fatalf("Unmarshal: %s", err) t.Fatalf("Unmarshal: %s", err)
} }
if x.Attr != OK { if x.Attr != OK {
......
...@@ -36,7 +36,7 @@ func (e *SyntaxError) Error() string { ...@@ -36,7 +36,7 @@ func (e *SyntaxError) Error() string {
// A Name represents an XML name (Local) annotated // A Name represents an XML name (Local) annotated
// with a name space identifier (Space). // with a name space identifier (Space).
// In tokens returned by Parser.Token, the Space identifier // In tokens returned by Decoder.Token, the Space identifier
// is given as a canonical URL, not the short prefix used // is given as a canonical URL, not the short prefix used
// in the document being parsed. // in the document being parsed.
type Name struct { type Name struct {
...@@ -124,9 +124,9 @@ func CopyToken(t Token) Token { ...@@ -124,9 +124,9 @@ func CopyToken(t Token) Token {
return t return t
} }
// A Parser represents an XML parser reading a particular input stream. // A Decoder represents an XML parser reading a particular input stream.
// The parser assumes that its input is encoded in UTF-8. // The parser assumes that its input is encoded in UTF-8.
type Parser struct { type Decoder struct {
// Strict defaults to true, enforcing the requirements // Strict defaults to true, enforcing the requirements
// of the XML specification. // of the XML specification.
// If set to false, the parser allows input containing common // If set to false, the parser allows input containing common
...@@ -139,9 +139,9 @@ type Parser struct { ...@@ -139,9 +139,9 @@ type Parser struct {
// //
// Setting: // Setting:
// //
// p.Strict = false; // d.Strict = false;
// p.AutoClose = HTMLAutoClose; // d.AutoClose = HTMLAutoClose;
// p.Entity = HTMLEntity // d.Entity = HTMLEntity
// //
// creates a parser that can handle typical HTML. // creates a parser that can handle typical HTML.
Strict bool Strict bool
...@@ -184,16 +184,16 @@ type Parser struct { ...@@ -184,16 +184,16 @@ type Parser struct {
tmp [32]byte tmp [32]byte
} }
// NewParser creates a new XML parser reading from r. // NewDecoder creates a new XML parser reading from r.
func NewParser(r io.Reader) *Parser { func NewDecoder(r io.Reader) *Decoder {
p := &Parser{ d := &Decoder{
ns: make(map[string]string), ns: make(map[string]string),
nextByte: -1, nextByte: -1,
line: 1, line: 1,
Strict: true, Strict: true,
} }
p.switchToReader(r) d.switchToReader(r)
return p return d
} }
// Token returns the next XML token in the input stream. // Token returns the next XML token in the input stream.
...@@ -218,17 +218,17 @@ func NewParser(r io.Reader) *Parser { ...@@ -218,17 +218,17 @@ func NewParser(r io.Reader) *Parser {
// set to the URL identifying its name space when known. // set to the URL identifying its name space when known.
// If Token encounters an unrecognized name space prefix, // If Token encounters an unrecognized name space prefix,
// it uses the prefix as the Space rather than report an error. // it uses the prefix as the Space rather than report an error.
func (p *Parser) Token() (t Token, err error) { func (d *Decoder) Token() (t Token, err error) {
if p.nextToken != nil { if d.nextToken != nil {
t = p.nextToken t = d.nextToken
p.nextToken = nil d.nextToken = nil
} else if t, err = p.RawToken(); err != nil { } else if t, err = d.RawToken(); err != nil {
return return
} }
if !p.Strict { if !d.Strict {
if t1, ok := p.autoClose(t); ok { if t1, ok := d.autoClose(t); ok {
p.nextToken = t d.nextToken = t
t = t1 t = t1
} }
} }
...@@ -240,29 +240,29 @@ func (p *Parser) Token() (t Token, err error) { ...@@ -240,29 +240,29 @@ func (p *Parser) Token() (t Token, err error) {
// the translations first. // the translations first.
for _, a := range t1.Attr { for _, a := range t1.Attr {
if a.Name.Space == "xmlns" { if a.Name.Space == "xmlns" {
v, ok := p.ns[a.Name.Local] v, ok := d.ns[a.Name.Local]
p.pushNs(a.Name.Local, v, ok) d.pushNs(a.Name.Local, v, ok)
p.ns[a.Name.Local] = a.Value d.ns[a.Name.Local] = a.Value
} }
if a.Name.Space == "" && a.Name.Local == "xmlns" { if a.Name.Space == "" && a.Name.Local == "xmlns" {
// Default space for untagged names // Default space for untagged names
v, ok := p.ns[""] v, ok := d.ns[""]
p.pushNs("", v, ok) d.pushNs("", v, ok)
p.ns[""] = a.Value d.ns[""] = a.Value
} }
} }
p.translate(&t1.Name, true) d.translate(&t1.Name, true)
for i := range t1.Attr { for i := range t1.Attr {
p.translate(&t1.Attr[i].Name, false) d.translate(&t1.Attr[i].Name, false)
} }
p.pushElement(t1.Name) d.pushElement(t1.Name)
t = t1 t = t1
case EndElement: case EndElement:
p.translate(&t1.Name, true) d.translate(&t1.Name, true)
if !p.popElement(&t1) { if !d.popElement(&t1) {
return nil, p.err return nil, d.err
} }
t = t1 t = t1
} }
...@@ -272,7 +272,7 @@ func (p *Parser) Token() (t Token, err error) { ...@@ -272,7 +272,7 @@ func (p *Parser) Token() (t Token, err error) {
// Apply name space translation to name n. // Apply name space translation to name n.
// The default name space (for Space=="") // The default name space (for Space=="")
// applies only to element names, not to attribute names. // applies only to element names, not to attribute names.
func (p *Parser) translate(n *Name, isElementName bool) { func (d *Decoder) translate(n *Name, isElementName bool) {
switch { switch {
case n.Space == "xmlns": case n.Space == "xmlns":
return return
...@@ -281,20 +281,20 @@ func (p *Parser) translate(n *Name, isElementName bool) { ...@@ -281,20 +281,20 @@ func (p *Parser) translate(n *Name, isElementName bool) {
case n.Space == "" && n.Local == "xmlns": case n.Space == "" && n.Local == "xmlns":
return return
} }
if v, ok := p.ns[n.Space]; ok { if v, ok := d.ns[n.Space]; ok {
n.Space = v n.Space = v
} }
} }
func (p *Parser) switchToReader(r io.Reader) { func (d *Decoder) switchToReader(r io.Reader) {
// Get efficient byte at a time reader. // Get efficient byte at a time reader.
// Assume that if reader has its own // Assume that if reader has its own
// ReadByte, it's efficient enough. // ReadByte, it's efficient enough.
// Otherwise, use bufio. // Otherwise, use bufio.
if rb, ok := r.(io.ByteReader); ok { if rb, ok := r.(io.ByteReader); ok {
p.r = rb d.r = rb
} else { } else {
p.r = bufio.NewReader(r) d.r = bufio.NewReader(r)
} }
} }
...@@ -314,47 +314,47 @@ const ( ...@@ -314,47 +314,47 @@ const (
stkNs stkNs
) )
func (p *Parser) push(kind int) *stack { func (d *Decoder) push(kind int) *stack {
s := p.free s := d.free
if s != nil { if s != nil {
p.free = s.next d.free = s.next
} else { } else {
s = new(stack) s = new(stack)
} }
s.next = p.stk s.next = d.stk
s.kind = kind s.kind = kind
p.stk = s d.stk = s
return s return s
} }
func (p *Parser) pop() *stack { func (d *Decoder) pop() *stack {
s := p.stk s := d.stk
if s != nil { if s != nil {
p.stk = s.next d.stk = s.next
s.next = p.free s.next = d.free
p.free = s d.free = s
} }
return s return s
} }
// Record that we are starting an element with the given name. // Record that we are starting an element with the given name.
func (p *Parser) pushElement(name Name) { func (d *Decoder) pushElement(name Name) {
s := p.push(stkStart) s := d.push(stkStart)
s.name = name s.name = name
} }
// Record that we are changing the value of ns[local]. // Record that we are changing the value of ns[local].
// The old value is url, ok. // The old value is url, ok.
func (p *Parser) pushNs(local string, url string, ok bool) { func (d *Decoder) pushNs(local string, url string, ok bool) {
s := p.push(stkNs) s := d.push(stkNs)
s.name.Local = local s.name.Local = local
s.name.Space = url s.name.Space = url
s.ok = ok s.ok = ok
} }
// Creates a SyntaxError with the current line number. // Creates a SyntaxError with the current line number.
func (p *Parser) syntaxError(msg string) error { func (d *Decoder) syntaxError(msg string) error {
return &SyntaxError{Msg: msg, Line: p.line} return &SyntaxError{Msg: msg, Line: d.line}
} }
// Record that we are ending an element with the given name. // Record that we are ending an element with the given name.
...@@ -363,36 +363,36 @@ func (p *Parser) syntaxError(msg string) error { ...@@ -363,36 +363,36 @@ func (p *Parser) syntaxError(msg string) error {
// After popping the element, apply any undo records from // After popping the element, apply any undo records from
// the stack to restore the name translations that existed // the stack to restore the name translations that existed
// before we saw this element. // before we saw this element.
func (p *Parser) popElement(t *EndElement) bool { func (d *Decoder) popElement(t *EndElement) bool {
s := p.pop() s := d.pop()
name := t.Name name := t.Name
switch { switch {
case s == nil || s.kind != stkStart: case s == nil || s.kind != stkStart:
p.err = p.syntaxError("unexpected end element </" + name.Local + ">") d.err = d.syntaxError("unexpected end element </" + name.Local + ">")
return false return false
case s.name.Local != name.Local: case s.name.Local != name.Local:
if !p.Strict { if !d.Strict {
p.needClose = true d.needClose = true
p.toClose = t.Name d.toClose = t.Name
t.Name = s.name t.Name = s.name
return true return true
} }
p.err = p.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
return false return false
case s.name.Space != name.Space: case s.name.Space != name.Space:
p.err = p.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
"closed by </" + name.Local + "> in space " + name.Space) "closed by </" + name.Local + "> in space " + name.Space)
return false return false
} }
// Pop stack until a Start is on the top, undoing the // Pop stack until a Start is on the top, undoing the
// translations that were associated with the element we just closed. // translations that were associated with the element we just closed.
for p.stk != nil && p.stk.kind != stkStart { for d.stk != nil && d.stk.kind != stkStart {
s := p.pop() s := d.pop()
if s.ok { if s.ok {
p.ns[s.name.Local] = s.name.Space d.ns[s.name.Local] = s.name.Space
} else { } else {
delete(p.ns, s.name.Local) delete(d.ns, s.name.Local)
} }
} }
...@@ -401,17 +401,17 @@ func (p *Parser) popElement(t *EndElement) bool { ...@@ -401,17 +401,17 @@ func (p *Parser) popElement(t *EndElement) bool {
// If the top element on the stack is autoclosing and // If the top element on the stack is autoclosing and
// t is not the end tag, invent the end tag. // t is not the end tag, invent the end tag.
func (p *Parser) autoClose(t Token) (Token, bool) { func (d *Decoder) autoClose(t Token) (Token, bool) {
if p.stk == nil || p.stk.kind != stkStart { if d.stk == nil || d.stk.kind != stkStart {
return nil, false return nil, false
} }
name := strings.ToLower(p.stk.name.Local) name := strings.ToLower(d.stk.name.Local)
for _, s := range p.AutoClose { for _, s := range d.AutoClose {
if strings.ToLower(s) == name { if strings.ToLower(s) == name {
// This one should be auto closed if t doesn't close it. // This one should be auto closed if t doesn't close it.
et, ok := t.(EndElement) et, ok := t.(EndElement)
if !ok || et.Name.Local != name { if !ok || et.Name.Local != name {
return EndElement{p.stk.name}, true return EndElement{d.stk.name}, true
} }
break break
} }
...@@ -422,53 +422,53 @@ func (p *Parser) autoClose(t Token) (Token, bool) { ...@@ -422,53 +422,53 @@ func (p *Parser) autoClose(t Token) (Token, bool) {
// RawToken is like Token but does not verify that // RawToken is like Token but does not verify that
// start and end elements match and does not translate // start and end elements match and does not translate
// name space prefixes to their corresponding URLs. // name space prefixes to their corresponding URLs.
func (p *Parser) RawToken() (Token, error) { func (d *Decoder) RawToken() (Token, error) {
if p.err != nil { if d.err != nil {
return nil, p.err return nil, d.err
} }
if p.needClose { if d.needClose {
// The last element we read was self-closing and // The last element we read was self-closing and
// we returned just the StartElement half. // we returned just the StartElement half.
// Return the EndElement half now. // Return the EndElement half now.
p.needClose = false d.needClose = false
return EndElement{p.toClose}, nil return EndElement{d.toClose}, nil
} }
b, ok := p.getc() b, ok := d.getc()
if !ok { if !ok {
return nil, p.err return nil, d.err
} }
if b != '<' { if b != '<' {
// Text section. // Text section.
p.ungetc(b) d.ungetc(b)
data := p.text(-1, false) data := d.text(-1, false)
if data == nil { if data == nil {
return nil, p.err return nil, d.err
} }
return CharData(data), nil return CharData(data), nil
} }
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
switch b { switch b {
case '/': case '/':
// </: End element // </: End element
var name Name var name Name
if name, ok = p.nsname(); !ok { if name, ok = d.nsname(); !ok {
if p.err == nil { if d.err == nil {
p.err = p.syntaxError("expected element name after </") d.err = d.syntaxError("expected element name after </")
} }
return nil, p.err return nil, d.err
} }
p.space() d.space()
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b != '>' { if b != '>' {
p.err = p.syntaxError("invalid characters between </" + name.Local + " and >") d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
return nil, p.err return nil, d.err
} }
return EndElement{name}, nil return EndElement{name}, nil
...@@ -477,95 +477,95 @@ func (p *Parser) RawToken() (Token, error) { ...@@ -477,95 +477,95 @@ func (p *Parser) RawToken() (Token, error) {
// TODO(rsc): Should parse the <?xml declaration to make sure // TODO(rsc): Should parse the <?xml declaration to make sure
// the version is 1.0 and the encoding is UTF-8. // the version is 1.0 and the encoding is UTF-8.
var target string var target string
if target, ok = p.name(); !ok { if target, ok = d.name(); !ok {
if p.err == nil { if d.err == nil {
p.err = p.syntaxError("expected target name after <?") d.err = d.syntaxError("expected target name after <?")
} }
return nil, p.err return nil, d.err
} }
p.space() d.space()
p.buf.Reset() d.buf.Reset()
var b0 byte var b0 byte
for { for {
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
p.buf.WriteByte(b) d.buf.WriteByte(b)
if b0 == '?' && b == '>' { if b0 == '?' && b == '>' {
break break
} }
b0 = b b0 = b
} }
data := p.buf.Bytes() data := d.buf.Bytes()
data = data[0 : len(data)-2] // chop ?> data = data[0 : len(data)-2] // chop ?>
if target == "xml" { if target == "xml" {
enc := procInstEncoding(string(data)) enc := procInstEncoding(string(data))
if enc != "" && enc != "utf-8" && enc != "UTF-8" { if enc != "" && enc != "utf-8" && enc != "UTF-8" {
if p.CharsetReader == nil { if d.CharsetReader == nil {
p.err = fmt.Errorf("xml: encoding %q declared but Parser.CharsetReader is nil", enc) d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc)
return nil, p.err return nil, d.err
} }
newr, err := p.CharsetReader(enc, p.r.(io.Reader)) newr, err := d.CharsetReader(enc, d.r.(io.Reader))
if err != nil { if err != nil {
p.err = fmt.Errorf("xml: opening charset %q: %v", enc, err) d.err = fmt.Errorf("xml: opening charset %q: %v", enc, err)
return nil, p.err return nil, d.err
} }
if newr == nil { if newr == nil {
panic("CharsetReader returned a nil Reader for charset " + enc) panic("CharsetReader returned a nil Reader for charset " + enc)
} }
p.switchToReader(newr) d.switchToReader(newr)
} }
} }
return ProcInst{target, data}, nil return ProcInst{target, data}, nil
case '!': case '!':
// <!: Maybe comment, maybe CDATA. // <!: Maybe comment, maybe CDATA.
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
switch b { switch b {
case '-': // <!- case '-': // <!-
// Probably <!-- for a comment. // Probably <!-- for a comment.
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b != '-' { if b != '-' {
p.err = p.syntaxError("invalid sequence <!- not part of <!--") d.err = d.syntaxError("invalid sequence <!- not part of <!--")
return nil, p.err return nil, d.err
} }
// Look for terminator. // Look for terminator.
p.buf.Reset() d.buf.Reset()
var b0, b1 byte var b0, b1 byte
for { for {
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
p.buf.WriteByte(b) d.buf.WriteByte(b)
if b0 == '-' && b1 == '-' && b == '>' { if b0 == '-' && b1 == '-' && b == '>' {
break break
} }
b0, b1 = b1, b b0, b1 = b1, b
} }
data := p.buf.Bytes() data := d.buf.Bytes()
data = data[0 : len(data)-3] // chop --> data = data[0 : len(data)-3] // chop -->
return Comment(data), nil return Comment(data), nil
case '[': // <![ case '[': // <![
// Probably <![CDATA[. // Probably <![CDATA[.
for i := 0; i < 6; i++ { for i := 0; i < 6; i++ {
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b != "CDATA["[i] { if b != "CDATA["[i] {
p.err = p.syntaxError("invalid <![ sequence") d.err = d.syntaxError("invalid <![ sequence")
return nil, p.err return nil, d.err
} }
} }
// Have <![CDATA[. Read text until ]]>. // Have <![CDATA[. Read text until ]]>.
data := p.text(-1, true) data := d.text(-1, true)
if data == nil { if data == nil {
return nil, p.err return nil, d.err
} }
return CharData(data), nil return CharData(data), nil
} }
...@@ -573,18 +573,18 @@ func (p *Parser) RawToken() (Token, error) { ...@@ -573,18 +573,18 @@ func (p *Parser) RawToken() (Token, error) {
// Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc. // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc.
// We don't care, but accumulate for caller. Quoted angle // We don't care, but accumulate for caller. Quoted angle
// brackets do not count for nesting. // brackets do not count for nesting.
p.buf.Reset() d.buf.Reset()
p.buf.WriteByte(b) d.buf.WriteByte(b)
inquote := uint8(0) inquote := uint8(0)
depth := 0 depth := 0
for { for {
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if inquote == 0 && b == '>' && depth == 0 { if inquote == 0 && b == '>' && depth == 0 {
break break
} }
p.buf.WriteByte(b) d.buf.WriteByte(b)
switch { switch {
case b == inquote: case b == inquote:
inquote = 0 inquote = 0
...@@ -602,45 +602,45 @@ func (p *Parser) RawToken() (Token, error) { ...@@ -602,45 +602,45 @@ func (p *Parser) RawToken() (Token, error) {
depth++ depth++
} }
} }
return Directive(p.buf.Bytes()), nil return Directive(d.buf.Bytes()), nil
} }
// Must be an open element like <a href="foo"> // Must be an open element like <a href="foo">
p.ungetc(b) d.ungetc(b)
var ( var (
name Name name Name
empty bool empty bool
attr []Attr attr []Attr
) )
if name, ok = p.nsname(); !ok { if name, ok = d.nsname(); !ok {
if p.err == nil { if d.err == nil {
p.err = p.syntaxError("expected element name after <") d.err = d.syntaxError("expected element name after <")
} }
return nil, p.err return nil, d.err
} }
attr = make([]Attr, 0, 4) attr = make([]Attr, 0, 4)
for { for {
p.space() d.space()
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b == '/' { if b == '/' {
empty = true empty = true
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b != '>' { if b != '>' {
p.err = p.syntaxError("expected /> in element") d.err = d.syntaxError("expected /> in element")
return nil, p.err return nil, d.err
} }
break break
} }
if b == '>' { if b == '>' {
break break
} }
p.ungetc(b) d.ungetc(b)
n := len(attr) n := len(attr)
if n >= cap(attr) { if n >= cap(attr) {
...@@ -650,85 +650,85 @@ func (p *Parser) RawToken() (Token, error) { ...@@ -650,85 +650,85 @@ func (p *Parser) RawToken() (Token, error) {
} }
attr = attr[0 : n+1] attr = attr[0 : n+1]
a := &attr[n] a := &attr[n]
if a.Name, ok = p.nsname(); !ok { if a.Name, ok = d.nsname(); !ok {
if p.err == nil { if d.err == nil {
p.err = p.syntaxError("expected attribute name in element") d.err = d.syntaxError("expected attribute name in element")
} }
return nil, p.err return nil, d.err
} }
p.space() d.space()
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return nil, p.err return nil, d.err
} }
if b != '=' { if b != '=' {
if p.Strict { if d.Strict {
p.err = p.syntaxError("attribute name without = in element") d.err = d.syntaxError("attribute name without = in element")
return nil, p.err return nil, d.err
} else { } else {
p.ungetc(b) d.ungetc(b)
a.Value = a.Name.Local a.Value = a.Name.Local
} }
} else { } else {
p.space() d.space()
data := p.attrval() data := d.attrval()
if data == nil { if data == nil {
return nil, p.err return nil, d.err
} }
a.Value = string(data) a.Value = string(data)
} }
} }
if empty { if empty {
p.needClose = true d.needClose = true
p.toClose = name d.toClose = name
} }
return StartElement{name, attr}, nil return StartElement{name, attr}, nil
} }
func (p *Parser) attrval() []byte { func (d *Decoder) attrval() []byte {
b, ok := p.mustgetc() b, ok := d.mustgetc()
if !ok { if !ok {
return nil return nil
} }
// Handle quoted attribute values // Handle quoted attribute values
if b == '"' || b == '\'' { if b == '"' || b == '\'' {
return p.text(int(b), false) return d.text(int(b), false)
} }
// Handle unquoted attribute values for strict parsers // Handle unquoted attribute values for strict parsers
if p.Strict { if d.Strict {
p.err = p.syntaxError("unquoted or missing attribute value in element") d.err = d.syntaxError("unquoted or missing attribute value in element")
return nil return nil
} }
// Handle unquoted attribute values for unstrict parsers // Handle unquoted attribute values for unstrict parsers
p.ungetc(b) d.ungetc(b)
p.buf.Reset() d.buf.Reset()
for { for {
b, ok = p.mustgetc() b, ok = d.mustgetc()
if !ok { if !ok {
return nil return nil
} }
// http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2 // http://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2
if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' || if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
'0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' { '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
p.buf.WriteByte(b) d.buf.WriteByte(b)
} else { } else {
p.ungetc(b) d.ungetc(b)
break break
} }
} }
return p.buf.Bytes() return d.buf.Bytes()
} }
// Skip spaces if any // Skip spaces if any
func (p *Parser) space() { func (d *Decoder) space() {
for { for {
b, ok := p.getc() b, ok := d.getc()
if !ok { if !ok {
return return
} }
switch b { switch b {
case ' ', '\r', '\n', '\t': case ' ', '\r', '\n', '\t':
default: default:
p.ungetc(b) d.ungetc(b)
return return
} }
} }
...@@ -736,35 +736,35 @@ func (p *Parser) space() { ...@@ -736,35 +736,35 @@ func (p *Parser) space() {
// Read a single byte. // Read a single byte.
// If there is no byte to read, return ok==false // If there is no byte to read, return ok==false
// and leave the error in p.err. // and leave the error in d.err.
// Maintain line number. // Maintain line number.
func (p *Parser) getc() (b byte, ok bool) { func (d *Decoder) getc() (b byte, ok bool) {
if p.err != nil { if d.err != nil {
return 0, false return 0, false
} }
if p.nextByte >= 0 { if d.nextByte >= 0 {
b = byte(p.nextByte) b = byte(d.nextByte)
p.nextByte = -1 d.nextByte = -1
} else { } else {
b, p.err = p.r.ReadByte() b, d.err = d.r.ReadByte()
if p.err != nil { if d.err != nil {
return 0, false return 0, false
} }
if p.saved != nil { if d.saved != nil {
p.saved.WriteByte(b) d.saved.WriteByte(b)
} }
} }
if b == '\n' { if b == '\n' {
p.line++ d.line++
} }
return b, true return b, true
} }
// Return saved offset. // Return saved offset.
// If we did ungetc (nextByte >= 0), have to back up one. // If we did ungetc (nextByte >= 0), have to back up one.
func (p *Parser) savedOffset() int { func (d *Decoder) savedOffset() int {
n := p.saved.Len() n := d.saved.Len()
if p.nextByte >= 0 { if d.nextByte >= 0 {
n-- n--
} }
return n return n
...@@ -772,23 +772,23 @@ func (p *Parser) savedOffset() int { ...@@ -772,23 +772,23 @@ func (p *Parser) savedOffset() int {
// Must read a single byte. // Must read a single byte.
// If there is no byte to read, // If there is no byte to read,
// set p.err to SyntaxError("unexpected EOF") // set d.err to SyntaxError("unexpected EOF")
// and return ok==false // and return ok==false
func (p *Parser) mustgetc() (b byte, ok bool) { func (d *Decoder) mustgetc() (b byte, ok bool) {
if b, ok = p.getc(); !ok { if b, ok = d.getc(); !ok {
if p.err == io.EOF { if d.err == io.EOF {
p.err = p.syntaxError("unexpected EOF") d.err = d.syntaxError("unexpected EOF")
} }
} }
return return
} }
// Unread a single byte. // Unread a single byte.
func (p *Parser) ungetc(b byte) { func (d *Decoder) ungetc(b byte) {
if b == '\n' { if b == '\n' {
p.line-- d.line--
} }
p.nextByte = int(b) d.nextByte = int(b)
} }
var entity = map[string]int{ var entity = map[string]int{
...@@ -802,18 +802,18 @@ var entity = map[string]int{ ...@@ -802,18 +802,18 @@ var entity = map[string]int{
// Read plain text section (XML calls it character data). // Read plain text section (XML calls it character data).
// If quote >= 0, we are in a quoted string and need to find the matching quote. // If quote >= 0, we are in a quoted string and need to find the matching quote.
// If cdata == true, we are in a <![CDATA[ section and need to find ]]>. // If cdata == true, we are in a <![CDATA[ section and need to find ]]>.
// On failure return nil and leave the error in p.err. // On failure return nil and leave the error in d.err.
func (p *Parser) text(quote int, cdata bool) []byte { func (d *Decoder) text(quote int, cdata bool) []byte {
var b0, b1 byte var b0, b1 byte
var trunc int var trunc int
p.buf.Reset() d.buf.Reset()
Input: Input:
for { for {
b, ok := p.getc() b, ok := d.getc()
if !ok { if !ok {
if cdata { if cdata {
if p.err == io.EOF { if d.err == io.EOF {
p.err = p.syntaxError("unexpected EOF in CDATA section") d.err = d.syntaxError("unexpected EOF in CDATA section")
} }
return nil return nil
} }
...@@ -827,17 +827,17 @@ Input: ...@@ -827,17 +827,17 @@ Input:
trunc = 2 trunc = 2
break Input break Input
} }
p.err = p.syntaxError("unescaped ]]> not in CDATA section") d.err = d.syntaxError("unescaped ]]> not in CDATA section")
return nil return nil
} }
// Stop reading text if we see a <. // Stop reading text if we see a <.
if b == '<' && !cdata { if b == '<' && !cdata {
if quote >= 0 { if quote >= 0 {
p.err = p.syntaxError("unescaped < inside quoted string") d.err = d.syntaxError("unescaped < inside quoted string")
return nil return nil
} }
p.ungetc('<') d.ungetc('<')
break Input break Input
} }
if quote >= 0 && b == byte(quote) { if quote >= 0 && b == byte(quote) {
...@@ -850,16 +850,16 @@ Input: ...@@ -850,16 +850,16 @@ Input:
// Parsers are required to recognize lt, gt, amp, apos, and quot // Parsers are required to recognize lt, gt, amp, apos, and quot
// even if they have not been declared. That's all we allow. // even if they have not been declared. That's all we allow.
var i int var i int
for i = 0; i < len(p.tmp); i++ { for i = 0; i < len(d.tmp); i++ {
var ok bool var ok bool
p.tmp[i], ok = p.getc() d.tmp[i], ok = d.getc()
if !ok { if !ok {
if p.err == io.EOF { if d.err == io.EOF {
p.err = p.syntaxError("unexpected EOF") d.err = d.syntaxError("unexpected EOF")
} }
return nil return nil
} }
c := p.tmp[i] c := d.tmp[i]
if c == ';' { if c == ';' {
break break
} }
...@@ -869,18 +869,18 @@ Input: ...@@ -869,18 +869,18 @@ Input:
c == '_' || c == '#' { c == '_' || c == '#' {
continue continue
} }
p.ungetc(c) d.ungetc(c)
break break
} }
s := string(p.tmp[0:i]) s := string(d.tmp[0:i])
if i >= len(p.tmp) { if i >= len(d.tmp) {
if !p.Strict { if !d.Strict {
b0, b1 = 0, 0 b0, b1 = 0, 0
p.buf.WriteByte('&') d.buf.WriteByte('&')
p.buf.Write(p.tmp[0:i]) d.buf.Write(d.tmp[0:i])
continue Input continue Input
} }
p.err = p.syntaxError("character entity expression &" + s + "... too long") d.err = d.syntaxError("character entity expression &" + s + "... too long")
return nil return nil
} }
var haveText bool var haveText bool
...@@ -901,28 +901,28 @@ Input: ...@@ -901,28 +901,28 @@ Input:
if r, ok := entity[s]; ok { if r, ok := entity[s]; ok {
text = string(r) text = string(r)
haveText = true haveText = true
} else if p.Entity != nil { } else if d.Entity != nil {
text, haveText = p.Entity[s] text, haveText = d.Entity[s]
} }
} }
if !haveText { if !haveText {
if !p.Strict { if !d.Strict {
b0, b1 = 0, 0 b0, b1 = 0, 0
p.buf.WriteByte('&') d.buf.WriteByte('&')
p.buf.Write(p.tmp[0:i]) d.buf.Write(d.tmp[0:i])
continue Input continue Input
} }
p.err = p.syntaxError("invalid character entity &" + s + ";") d.err = d.syntaxError("invalid character entity &" + s + ";")
return nil return nil
} }
p.buf.Write([]byte(text)) d.buf.Write([]byte(text))
b0, b1 = 0, 0 b0, b1 = 0, 0
continue Input continue Input
} }
p.buf.WriteByte(b) d.buf.WriteByte(b)
b0, b1 = b1, b b0, b1 = b1, b
} }
data := p.buf.Bytes() data := d.buf.Bytes()
data = data[0 : len(data)-trunc] data = data[0 : len(data)-trunc]
// Inspect each rune for being a disallowed character. // Inspect each rune for being a disallowed character.
...@@ -930,12 +930,12 @@ Input: ...@@ -930,12 +930,12 @@ Input:
for len(buf) > 0 { for len(buf) > 0 {
r, size := utf8.DecodeRune(buf) r, size := utf8.DecodeRune(buf)
if r == utf8.RuneError && size == 1 { if r == utf8.RuneError && size == 1 {
p.err = p.syntaxError("invalid UTF-8") d.err = d.syntaxError("invalid UTF-8")
return nil return nil
} }
buf = buf[size:] buf = buf[size:]
if !isInCharacterRange(r) { if !isInCharacterRange(r) {
p.err = p.syntaxError(fmt.Sprintf("illegal character code %U", r)) d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r))
return nil return nil
} }
} }
...@@ -970,8 +970,8 @@ func isInCharacterRange(r rune) (inrange bool) { ...@@ -970,8 +970,8 @@ func isInCharacterRange(r rune) (inrange bool) {
// Get name space name: name with a : stuck in the middle. // Get name space name: name with a : stuck in the middle.
// The part before the : is the name space identifier. // The part before the : is the name space identifier.
func (p *Parser) nsname() (name Name, ok bool) { func (d *Decoder) nsname() (name Name, ok bool) {
s, ok := p.name() s, ok := d.name()
if !ok { if !ok {
return return
} }
...@@ -986,37 +986,37 @@ func (p *Parser) nsname() (name Name, ok bool) { ...@@ -986,37 +986,37 @@ func (p *Parser) nsname() (name Name, ok bool) {
} }
// Get name: /first(first|second)*/ // Get name: /first(first|second)*/
// Do not set p.err if the name is missing (unless unexpected EOF is received): // Do not set d.err if the name is missing (unless unexpected EOF is received):
// let the caller provide better context. // let the caller provide better context.
func (p *Parser) name() (s string, ok bool) { func (d *Decoder) name() (s string, ok bool) {
var b byte var b byte
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return return
} }
// As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]* // As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]*
if b < utf8.RuneSelf && !isNameByte(b) { if b < utf8.RuneSelf && !isNameByte(b) {
p.ungetc(b) d.ungetc(b)
return "", false return "", false
} }
p.buf.Reset() d.buf.Reset()
p.buf.WriteByte(b) d.buf.WriteByte(b)
for { for {
if b, ok = p.mustgetc(); !ok { if b, ok = d.mustgetc(); !ok {
return return
} }
if b < utf8.RuneSelf && !isNameByte(b) { if b < utf8.RuneSelf && !isNameByte(b) {
p.ungetc(b) d.ungetc(b)
break break
} }
p.buf.WriteByte(b) d.buf.WriteByte(b)
} }
// Then we check the characters. // Then we check the characters.
s = p.buf.String() s = d.buf.String()
for i, c := range s { for i, c := range s {
if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) { if !unicode.Is(first, c) && (i == 0 || !unicode.Is(second, c)) {
p.err = p.syntaxError("invalid XML name: " + s) d.err = d.syntaxError("invalid XML name: " + s)
return "", false return "", false
} }
} }
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
package xml package xml
import ( import (
"bytes"
"io" "io"
"reflect" "reflect"
"strings" "strings"
...@@ -155,8 +154,8 @@ var xmlInput = []string{ ...@@ -155,8 +154,8 @@ var xmlInput = []string{
} }
func TestRawToken(t *testing.T) { func TestRawToken(t *testing.T) {
p := NewParser(strings.NewReader(testInput)) d := NewDecoder(strings.NewReader(testInput))
testRawToken(t, p, rawTokens) testRawToken(t, d, rawTokens)
} }
type downCaser struct { type downCaser struct {
...@@ -179,27 +178,27 @@ func (d *downCaser) Read(p []byte) (int, error) { ...@@ -179,27 +178,27 @@ func (d *downCaser) Read(p []byte) (int, error) {
func TestRawTokenAltEncoding(t *testing.T) { func TestRawTokenAltEncoding(t *testing.T) {
sawEncoding := "" sawEncoding := ""
p := NewParser(strings.NewReader(testInputAltEncoding)) d := NewDecoder(strings.NewReader(testInputAltEncoding))
p.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
sawEncoding = charset sawEncoding = charset
if charset != "x-testing-uppercase" { if charset != "x-testing-uppercase" {
t.Fatalf("unexpected charset %q", charset) t.Fatalf("unexpected charset %q", charset)
} }
return &downCaser{t, input.(io.ByteReader)}, nil return &downCaser{t, input.(io.ByteReader)}, nil
} }
testRawToken(t, p, rawTokensAltEncoding) testRawToken(t, d, rawTokensAltEncoding)
} }
func TestRawTokenAltEncodingNoConverter(t *testing.T) { func TestRawTokenAltEncodingNoConverter(t *testing.T) {
p := NewParser(strings.NewReader(testInputAltEncoding)) d := NewDecoder(strings.NewReader(testInputAltEncoding))
token, err := p.RawToken() token, err := d.RawToken()
if token == nil { if token == nil {
t.Fatalf("expected a token on first RawToken call") t.Fatalf("expected a token on first RawToken call")
} }
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
token, err = p.RawToken() token, err = d.RawToken()
if token != nil { if token != nil {
t.Errorf("expected a nil token; got %#v", token) t.Errorf("expected a nil token; got %#v", token)
} }
...@@ -213,9 +212,9 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) { ...@@ -213,9 +212,9 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) {
} }
} }
func testRawToken(t *testing.T, p *Parser, rawTokens []Token) { func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
for i, want := range rawTokens { for i, want := range rawTokens {
have, err := p.RawToken() have, err := d.RawToken()
if err != nil { if err != nil {
t.Fatalf("token %d: unexpected error: %s", i, err) t.Fatalf("token %d: unexpected error: %s", i, err)
} }
...@@ -258,10 +257,10 @@ var nestedDirectivesTokens = []Token{ ...@@ -258,10 +257,10 @@ var nestedDirectivesTokens = []Token{
} }
func TestNestedDirectives(t *testing.T) { func TestNestedDirectives(t *testing.T) {
p := NewParser(strings.NewReader(nestedDirectivesInput)) d := NewDecoder(strings.NewReader(nestedDirectivesInput))
for i, want := range nestedDirectivesTokens { for i, want := range nestedDirectivesTokens {
have, err := p.Token() have, err := d.Token()
if err != nil { if err != nil {
t.Fatalf("token %d: unexpected error: %s", i, err) t.Fatalf("token %d: unexpected error: %s", i, err)
} }
...@@ -272,10 +271,10 @@ func TestNestedDirectives(t *testing.T) { ...@@ -272,10 +271,10 @@ func TestNestedDirectives(t *testing.T) {
} }
func TestToken(t *testing.T) { func TestToken(t *testing.T) {
p := NewParser(strings.NewReader(testInput)) d := NewDecoder(strings.NewReader(testInput))
for i, want := range cookedTokens { for i, want := range cookedTokens {
have, err := p.Token() have, err := d.Token()
if err != nil { if err != nil {
t.Fatalf("token %d: unexpected error: %s", i, err) t.Fatalf("token %d: unexpected error: %s", i, err)
} }
...@@ -287,9 +286,9 @@ func TestToken(t *testing.T) { ...@@ -287,9 +286,9 @@ func TestToken(t *testing.T) {
func TestSyntax(t *testing.T) { func TestSyntax(t *testing.T) {
for i := range xmlInput { for i := range xmlInput {
p := NewParser(strings.NewReader(xmlInput[i])) d := NewDecoder(strings.NewReader(xmlInput[i]))
var err error var err error
for _, err = p.Token(); err == nil; _, err = p.Token() { for _, err = d.Token(); err == nil; _, err = d.Token() {
} }
if _, ok := err.(*SyntaxError); !ok { if _, ok := err.(*SyntaxError); !ok {
t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i]) t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
...@@ -368,8 +367,7 @@ const testScalarsInput = `<allscalars> ...@@ -368,8 +367,7 @@ const testScalarsInput = `<allscalars>
func TestAllScalars(t *testing.T) { func TestAllScalars(t *testing.T) {
var a allScalars var a allScalars
buf := bytes.NewBufferString(testScalarsInput) err := Unmarshal([]byte(testScalarsInput), &a)
err := Unmarshal(buf, &a)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
...@@ -386,8 +384,7 @@ type item struct { ...@@ -386,8 +384,7 @@ type item struct {
func TestIssue569(t *testing.T) { func TestIssue569(t *testing.T) {
data := `<item><Field_a>abcd</Field_a></item>` data := `<item><Field_a>abcd</Field_a></item>`
var i item var i item
buf := bytes.NewBufferString(data) err := Unmarshal([]byte(data), &i)
err := Unmarshal(buf, &i)
if err != nil || i.Field_a != "abcd" { if err != nil || i.Field_a != "abcd" {
t.Fatal("Expecting abcd") t.Fatal("Expecting abcd")
...@@ -396,9 +393,9 @@ func TestIssue569(t *testing.T) { ...@@ -396,9 +393,9 @@ func TestIssue569(t *testing.T) {
func TestUnquotedAttrs(t *testing.T) { func TestUnquotedAttrs(t *testing.T) {
data := "<tag attr=azAZ09:-_\t>" data := "<tag attr=azAZ09:-_\t>"
p := NewParser(strings.NewReader(data)) d := NewDecoder(strings.NewReader(data))
p.Strict = false d.Strict = false
token, err := p.Token() token, err := d.Token()
if _, ok := err.(*SyntaxError); ok { if _, ok := err.(*SyntaxError); ok {
t.Errorf("Unexpected error: %v", err) t.Errorf("Unexpected error: %v", err)
} }
...@@ -422,9 +419,9 @@ func TestValuelessAttrs(t *testing.T) { ...@@ -422,9 +419,9 @@ func TestValuelessAttrs(t *testing.T) {
{"<input checked />", "input", "checked"}, {"<input checked />", "input", "checked"},
} }
for _, test := range tests { for _, test := range tests {
p := NewParser(strings.NewReader(test[0])) d := NewDecoder(strings.NewReader(test[0]))
p.Strict = false d.Strict = false
token, err := p.Token() token, err := d.Token()
if _, ok := err.(*SyntaxError); ok { if _, ok := err.(*SyntaxError); ok {
t.Errorf("Unexpected error: %v", err) t.Errorf("Unexpected error: %v", err)
} }
...@@ -472,9 +469,9 @@ func TestCopyTokenStartElement(t *testing.T) { ...@@ -472,9 +469,9 @@ func TestCopyTokenStartElement(t *testing.T) {
func TestSyntaxErrorLineNum(t *testing.T) { func TestSyntaxErrorLineNum(t *testing.T) {
testInput := "<P>Foo<P>\n\n<P>Bar</>\n" testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
p := NewParser(strings.NewReader(testInput)) d := NewDecoder(strings.NewReader(testInput))
var err error var err error
for _, err = p.Token(); err == nil; _, err = p.Token() { for _, err = d.Token(); err == nil; _, err = d.Token() {
} }
synerr, ok := err.(*SyntaxError) synerr, ok := err.(*SyntaxError)
if !ok { if !ok {
...@@ -487,41 +484,41 @@ func TestSyntaxErrorLineNum(t *testing.T) { ...@@ -487,41 +484,41 @@ func TestSyntaxErrorLineNum(t *testing.T) {
func TestTrailingRawToken(t *testing.T) { func TestTrailingRawToken(t *testing.T) {
input := `<FOO></FOO> ` input := `<FOO></FOO> `
p := NewParser(strings.NewReader(input)) d := NewDecoder(strings.NewReader(input))
var err error var err error
for _, err = p.RawToken(); err == nil; _, err = p.RawToken() { for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
} }
if err != io.EOF { if err != io.EOF {
t.Fatalf("p.RawToken() = _, %v, want _, io.EOF", err) t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
} }
} }
func TestTrailingToken(t *testing.T) { func TestTrailingToken(t *testing.T) {
input := `<FOO></FOO> ` input := `<FOO></FOO> `
p := NewParser(strings.NewReader(input)) d := NewDecoder(strings.NewReader(input))
var err error var err error
for _, err = p.Token(); err == nil; _, err = p.Token() { for _, err = d.Token(); err == nil; _, err = d.Token() {
} }
if err != io.EOF { if err != io.EOF {
t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
} }
} }
func TestEntityInsideCDATA(t *testing.T) { func TestEntityInsideCDATA(t *testing.T) {
input := `<test><![CDATA[ &val=foo ]]></test>` input := `<test><![CDATA[ &val=foo ]]></test>`
p := NewParser(strings.NewReader(input)) d := NewDecoder(strings.NewReader(input))
var err error var err error
for _, err = p.Token(); err == nil; _, err = p.Token() { for _, err = d.Token(); err == nil; _, err = d.Token() {
} }
if err != io.EOF { if err != io.EOF {
t.Fatalf("p.Token() = _, %v, want _, io.EOF", err) t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
} }
} }
// The last three tests (respectively one for characters in attribute // The last three tests (respectively one for characters in attribute
// names and two for character entities) pass not because of code // names and two for character entities) pass not because of code
// changed for issue 1259, but instead pass with the given messages // changed for issue 1259, but instead pass with the given messages
// from other parts of xml.Parser. I provide these to note the // from other parts of xml.Decoder. I provide these to note the
// current behavior of situations where one might think that character // current behavior of situations where one might think that character
// range checking would detect the error, but it does not in fact. // range checking would detect the error, but it does not in fact.
...@@ -541,15 +538,15 @@ var characterTests = []struct { ...@@ -541,15 +538,15 @@ var characterTests = []struct {
func TestDisallowedCharacters(t *testing.T) { func TestDisallowedCharacters(t *testing.T) {
for i, tt := range characterTests { for i, tt := range characterTests {
p := NewParser(strings.NewReader(tt.in)) d := NewDecoder(strings.NewReader(tt.in))
var err error var err error
for err == nil { for err == nil {
_, err = p.Token() _, err = d.Token()
} }
synerr, ok := err.(*SyntaxError) synerr, ok := err.(*SyntaxError)
if !ok { if !ok {
t.Fatalf("input %d p.Token() = _, %v, want _, *SyntaxError", i, err) t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
} }
if synerr.Msg != tt.err { if synerr.Msg != tt.err {
t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg) t.Fatalf("input %d synerr.Msg wrong: want '%s', got '%s'", i, tt.err, synerr.Msg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment