Commit 1627b46e authored by Gustavo Niemeyer's avatar Gustavo Niemeyer

xml: major Go 1 fixup

This CL improves the xml package in the following ways:

- makes its interface match established conventions
- brings Marshal and Unmarshal closer together
- fixes a large number of bugs and adds tests
- improves speed significantly
- organizes and simplifies the code

Fixes #2426.
Fixes #2406.
Fixes #1989.

What follows is a detailed list of those changes.

- All matching is case sensitive without special processing
  to the field name or xml tag in an attempt to match them.
  Customize the field tag as desired to match the correct XML
  elements.

- Flags are ",flag" rather than "flag". The names "attr",
  "chardata", etc, may be used to name actual XML elements.

- Overriding of attribute names is possible with "name,attr".

- Attribute fields are marshalled properly if they have
  non-string types. Previously they were unmarshalled, but were
  ignored at marshalling time.

- Comment fields tagged with ",comment" are marshalled properly,
  rather than being marshalled as normal fields.

- The handling of the Any field has been replaced by the ",any"
  flag to avoid unexpected results when using the field name for
  other purposes, and has also been fixed to interact properly
  with name paths. Previously the feature would not function
  if any field in the type had a name path in its tag.

- Embedded struct support fixed and cleaned so it works when
  marshalling and also when using field paths deeper than one level.

- Conflict reporting on field names have been expanded to cover
  all fields. Previously it'd catch only conflicts of paths
  deeper than one level. Also interacts correctly with embedded
  structs now.

- A trailing '>' is disallowed in xml tags. It used to be
  supported for removing the ambiguity between "attr" and "attr>",
  but the marshalling support for that was broken, and it's now
  unnecessary. Use "name" instead of "name>".

- Fixed docs to point out that a XMLName doesn't have to be
  an xml.Name (e.g. a struct{} is a good fit too). The code was
  already working like that.

- Fixed asymmetry in the precedence of XML element names between
  marshalling and unmarshalling. Marshal would consider the XMLName
  of the field type before the field tag, while unmarshalling would
  do the opposite. Now both respect the tag of the XMLName field
  first, and a nice error message is provided in case an attempt
  is made to name a field with its tag in a way that would
  conflict with the underlying type's XMLName field.

- Do not marshal broken "<???>" tags when in doubt. Use the type
  name, and error out if that's not possible.

- Do not break down unmarshalling if there's an interface{} field
  in a struct.

- Significant speed boost due to caching of type metadata and
  overall allocation clean ups. The following timings reflect
  processing of the the atom test data:

  Old:

  BenchmarkMarshal           50000             48798 ns/op
  BenchmarkUnmarshal          5000            357174 ns/op

  New:

  BenchmarkMarshal          100000             19799 ns/op
  BenchmarkUnmarshal         10000            128525 ns/op

R=cw, gustavo, kevlar, adg, rogpeppe, fullung, christoph, rsc
CC=golang-dev
https://golang.org/cl/5503078
parent 45ca908f
...@@ -9,6 +9,7 @@ TARG=encoding/xml ...@@ -9,6 +9,7 @@ TARG=encoding/xml
GOFILES=\ GOFILES=\
marshal.go\ marshal.go\
read.go\ read.go\
typeinfo.go\
xml.go\ xml.go\
include ../../../Make.pkg include ../../../Make.pkg
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
package xml package xml
var atomValue = &Feed{ var atomValue = &Feed{
XMLName: Name{"http://www.w3.org/2005/Atom", "feed"},
Title: "Example Feed", Title: "Example Feed",
Link: []Link{{Href: "http://example.org/"}}, Link: []Link{{Href: "http://example.org/"}},
Updated: ParseTime("2003-12-13T18:30:02Z"), Updated: ParseTime("2003-12-13T18:30:02Z"),
...@@ -24,19 +25,19 @@ var atomValue = &Feed{ ...@@ -24,19 +25,19 @@ var atomValue = &Feed{
var atomXml = `` + var atomXml = `` +
`<feed xmlns="http://www.w3.org/2005/Atom">` + `<feed xmlns="http://www.w3.org/2005/Atom">` +
`<Title>Example Feed</Title>` + `<title>Example Feed</title>` +
`<Id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</Id>` + `<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>` +
`<Link href="http://example.org/"></Link>` + `<link href="http://example.org/"></link>` +
`<Updated>2003-12-13T18:30:02Z</Updated>` + `<updated>2003-12-13T18:30:02Z</updated>` +
`<Author><Name>John Doe</Name><URI></URI><Email></Email></Author>` + `<author><name>John Doe</name><uri></uri><email></email></author>` +
`<Entry>` + `<entry>` +
`<Title>Atom-Powered Robots Run Amok</Title>` + `<title>Atom-Powered Robots Run Amok</title>` +
`<Id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</Id>` + `<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>` +
`<Link href="http://example.org/2003/12/13/atom03"></Link>` + `<link href="http://example.org/2003/12/13/atom03"></link>` +
`<Updated>2003-12-13T18:30:02Z</Updated>` + `<updated>2003-12-13T18:30:02Z</updated>` +
`<Author><Name></Name><URI></URI><Email></Email></Author>` + `<author><name></name><uri></uri><email></email></author>` +
`<Summary>Some text.</Summary>` + `<summary>Some text.</summary>` +
`</Entry>` + `</entry>` +
`</feed>` `</feed>`
func ParseTime(str string) Time { func ParseTime(str string) Time {
......
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xml
import (
"strings"
"testing"
)
type C struct {
Name string
Open bool
}
type A struct {
XMLName Name `xml:"http://domain a"`
C
B B
FieldA string
}
type B struct {
XMLName Name `xml:"b"`
C
FieldB string
}
const _1a = `
<?xml version="1.0" encoding="UTF-8"?>
<a xmlns="http://domain">
<name>KmlFile</name>
<open>1</open>
<b>
<name>Absolute</name>
<open>0</open>
<fieldb>bar</fieldb>
</b>
<fielda>foo</fielda>
</a>
`
// Tests that embedded structs are marshalled.
func TestEmbedded1(t *testing.T) {
var a A
if e := Unmarshal(strings.NewReader(_1a), &a); e != nil {
t.Fatalf("Unmarshal: %s", e)
}
if a.FieldA != "foo" {
t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.FieldA)
}
if a.Name != "KmlFile" {
t.Fatalf("Unmarshal: expected 'KmlFile' but found '%s'", a.Name)
}
if !a.Open {
t.Fatal("Unmarshal: expected 'true' but found otherwise")
}
if a.B.FieldB != "bar" {
t.Fatalf("Unmarshal: expected 'bar' but found '%s'", a.B.FieldB)
}
if a.B.Name != "Absolute" {
t.Fatalf("Unmarshal: expected 'Absolute' but found '%s'", a.B.Name)
}
if a.B.Open {
t.Fatal("Unmarshal: expected 'false' but found otherwise")
}
}
type A2 struct {
XMLName Name `xml:"http://domain a"`
XY string
Xy string
}
const _2a = `
<?xml version="1.0" encoding="UTF-8"?>
<a xmlns="http://domain">
<xy>foo</xy>
</a>
`
// Tests that conflicting field names get excluded.
func TestEmbedded2(t *testing.T) {
var a A2
if e := Unmarshal(strings.NewReader(_2a), &a); e != nil {
t.Fatalf("Unmarshal: %s", e)
}
if a.XY != "" {
t.Fatalf("Unmarshal: expected empty string but found '%s'", a.XY)
}
if a.Xy != "" {
t.Fatalf("Unmarshal: expected empty string but found '%s'", a.Xy)
}
}
type A3 struct {
XMLName Name `xml:"http://domain a"`
xy string
}
// Tests that private fields are not set.
func TestEmbedded3(t *testing.T) {
var a A3
if e := Unmarshal(strings.NewReader(_2a), &a); e != nil {
t.Fatalf("Unmarshal: %s", e)
}
if a.xy != "" {
t.Fatalf("Unmarshal: expected empty string but found '%s'", a.xy)
}
}
type A4 struct {
XMLName Name `xml:"http://domain a"`
Any string
}
// Tests that private fields are not set.
func TestEmbedded4(t *testing.T) {
var a A4
if e := Unmarshal(strings.NewReader(_2a), &a); e != nil {
t.Fatalf("Unmarshal: %s", e)
}
if a.Any != "foo" {
t.Fatalf("Unmarshal: expected 'foo' but found '%s'", a.Any)
}
}
...@@ -6,6 +6,8 @@ package xml ...@@ -6,6 +6,8 @@ package xml
import ( import (
"bufio" "bufio"
"bytes"
"fmt"
"io" "io"
"reflect" "reflect"
"strconv" "strconv"
...@@ -42,20 +44,26 @@ type printer struct { ...@@ -42,20 +44,26 @@ type printer struct {
// elements containing the data. // elements containing the data.
// //
// The name for the XML elements is taken from, in order of preference: // The name for the XML elements is taken from, in order of preference:
// - the tag on an XMLName field, if the data is a struct // - the tag on the XMLName field, if the data is a struct
// - the value of an XMLName field of type xml.Name // - the value of the XMLName field of type xml.Name
// - the tag of the struct field used to obtain the data // - the tag of the struct field used to obtain the data
// - the name of the struct field used to obtain the data // - the name of the struct field used to obtain the data
// - the name '???'. // - the name of the marshalled type
// //
// The XML element for a struct contains marshalled elements for each of the // The XML element for a struct contains marshalled elements for each of the
// exported fields of the struct, with these exceptions: // exported fields of the struct, with these exceptions:
// - the XMLName field, described above, is omitted. // - the XMLName field, described above, is omitted.
// - a field with tag "attr" becomes an attribute in the XML element. // - a field with tag "name,attr" becomes an attribute with
// - a field with tag "chardata" is written as character data, // the given name in the XML element.
// not as an XML element. // - a field with tag ",attr" becomes an attribute with the
// - a field with tag "innerxml" is written verbatim, // field name in the in the XML element.
// not subject to the usual marshalling procedure. // - a field with tag ",chardata" is written as character data,
// not as an XML element.
// - a field with tag ",innerxml" is written verbatim, not subject
// to the usual marshalling procedure.
// - a field with tag ",comment" is written as an XML comment, not
// subject to the usual marshalling procedure. It must not contain
// the "--" string within it.
// //
// If a field uses a tag "a>b>c", then the element c will be nested inside // If a field uses a tag "a>b>c", then the element c will be nested inside
// parent elements a and b. Fields that appear next to each other that name // parent elements a and b. Fields that appear next to each other that name
...@@ -63,17 +71,18 @@ type printer struct { ...@@ -63,17 +71,18 @@ type printer struct {
// //
// type Result struct { // type Result struct {
// XMLName xml.Name `xml:"result"` // XMLName xml.Name `xml:"result"`
// Id int `xml:"id,attr"`
// FirstName string `xml:"person>name>first"` // FirstName string `xml:"person>name>first"`
// LastName string `xml:"person>name>last"` // LastName string `xml:"person>name>last"`
// Age int `xml:"person>age"` // Age int `xml:"person>age"`
// } // }
// //
// xml.Marshal(w, &Result{FirstName: "John", LastName: "Doe", Age: 42}) // xml.Marshal(w, &Result{Id: 13, FirstName: "John", LastName: "Doe", Age: 42})
// //
// would be marshalled as: // would be marshalled as:
// //
// <result> // <result>
// <person> // <person id="13">
// <name> // <name>
// <first>John</first> // <first>John</first>
// <last>Doe</last> // <last>Doe</last>
...@@ -85,12 +94,12 @@ type printer struct { ...@@ -85,12 +94,12 @@ type printer struct {
// Marshal will return an error if asked to marshal a channel, function, or map. // Marshal will return an error if asked to marshal a channel, function, or map.
func Marshal(w io.Writer, v interface{}) (err error) { func Marshal(w io.Writer, v interface{}) (err error) {
p := &printer{bufio.NewWriter(w)} p := &printer{bufio.NewWriter(w)}
err = p.marshalValue(reflect.ValueOf(v), "???") err = p.marshalValue(reflect.ValueOf(v), nil)
p.Flush() p.Flush()
return err return err
} }
func (p *printer) marshalValue(val reflect.Value, name string) error { func (p *printer) marshalValue(val reflect.Value, finfo *fieldInfo) error {
if !val.IsValid() { if !val.IsValid() {
return nil return nil
} }
...@@ -115,58 +124,75 @@ func (p *printer) marshalValue(val reflect.Value, name string) error { ...@@ -115,58 +124,75 @@ func (p *printer) marshalValue(val reflect.Value, name string) error {
if val.IsNil() { if val.IsNil() {
return nil return nil
} }
return p.marshalValue(val.Elem(), name) return p.marshalValue(val.Elem(), finfo)
} }
// Slices and arrays iterate over the elements. They do not have an enclosing tag. // Slices and arrays iterate over the elements. They do not have an enclosing tag.
if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 { if (kind == reflect.Slice || kind == reflect.Array) && typ.Elem().Kind() != reflect.Uint8 {
for i, n := 0, val.Len(); i < n; i++ { for i, n := 0, val.Len(); i < n; i++ {
if err := p.marshalValue(val.Index(i), name); err != nil { if err := p.marshalValue(val.Index(i), finfo); err != nil {
return err return err
} }
} }
return nil return nil
} }
// Find XML name tinfo, err := getTypeInfo(typ)
xmlns := "" if err != nil {
if kind == reflect.Struct { return err
if f, ok := typ.FieldByName("XMLName"); ok { }
if tag := f.Tag.Get("xml"); tag != "" {
if i := strings.Index(tag, " "); i >= 0 { // Precedence for the XML element name is:
xmlns, name = tag[:i], tag[i+1:] // 1. XMLName field in underlying struct;
} else { // 2. field name/tag in the struct field; and
name = tag // 3. type name
} var xmlns, name string
} else if v, ok := val.FieldByIndex(f.Index).Interface().(Name); ok && v.Local != "" { if tinfo.xmlname != nil {
xmlns, name = v.Space, v.Local xmlname := tinfo.xmlname
} if xmlname.name != "" {
xmlns, name = xmlname.xmlns, xmlname.name
} else if v, ok := val.FieldByIndex(xmlname.idx).Interface().(Name); ok && v.Local != "" {
xmlns, name = v.Space, v.Local
}
}
if name == "" && finfo != nil {
xmlns, name = finfo.xmlns, finfo.name
}
if name == "" {
name = typ.Name()
if name == "" {
return &UnsupportedTypeError{typ}
} }
} }
p.WriteByte('<') p.WriteByte('<')
p.WriteString(name) p.WriteString(name)
if xmlns != "" {
p.WriteString(` xmlns="`)
// TODO: EscapeString, to avoid the allocation.
Escape(p, []byte(xmlns))
p.WriteByte('"')
}
// Attributes // Attributes
if kind == reflect.Struct { for i := range tinfo.fields {
if len(xmlns) > 0 { finfo := &tinfo.fields[i]
p.WriteString(` xmlns="`) if finfo.flags&fAttr == 0 {
Escape(p, []byte(xmlns)) continue
p.WriteByte('"')
} }
var str string
for i, n := 0, typ.NumField(); i < n; i++ { if fv := val.FieldByIndex(finfo.idx); fv.Kind() == reflect.String {
if f := typ.Field(i); f.PkgPath == "" && f.Tag.Get("xml") == "attr" { str = fv.String()
if f.Type.Kind() == reflect.String { } else {
if str := val.Field(i).String(); str != "" { str = fmt.Sprint(fv.Interface())
p.WriteByte(' ') }
p.WriteString(strings.ToLower(f.Name)) if str != "" {
p.WriteString(`="`) p.WriteByte(' ')
Escape(p, []byte(str)) p.WriteString(finfo.name)
p.WriteByte('"') p.WriteString(`="`)
} Escape(p, []byte(str))
} p.WriteByte('"')
}
} }
} }
p.WriteByte('>') p.WriteByte('>')
...@@ -194,58 +220,9 @@ func (p *printer) marshalValue(val reflect.Value, name string) error { ...@@ -194,58 +220,9 @@ func (p *printer) marshalValue(val reflect.Value, name string) error {
bytes := val.Interface().([]byte) bytes := val.Interface().([]byte)
Escape(p, bytes) Escape(p, bytes)
case reflect.Struct: case reflect.Struct:
s := parentStack{printer: p} if err := p.marshalStruct(tinfo, val); err != nil {
for i, n := 0, val.NumField(); i < n; i++ { return err
if f := typ.Field(i); f.Name != "XMLName" && f.PkgPath == "" {
name := f.Name
vf := val.Field(i)
switch tag := f.Tag.Get("xml"); tag {
case "":
s.trim(nil)
case "chardata":
if tk := f.Type.Kind(); tk == reflect.String {
Escape(p, []byte(vf.String()))
} else if tk == reflect.Slice {
if elem, ok := vf.Interface().([]byte); ok {
Escape(p, elem)
}
}
continue
case "innerxml":
iface := vf.Interface()
switch raw := iface.(type) {
case []byte:
p.Write(raw)
continue
case string:
p.WriteString(raw)
continue
}
case "attr":
continue
default:
parents := strings.Split(tag, ">")
if len(parents) == 1 {
parents, name = nil, tag
} else {
parents, name = parents[:len(parents)-1], parents[len(parents)-1]
if parents[0] == "" {
parents[0] = f.Name
}
}
s.trim(parents)
if !(vf.Kind() == reflect.Ptr || vf.Kind() == reflect.Interface) || !vf.IsNil() {
s.push(parents[len(s.stack):])
}
}
if err := p.marshalValue(vf, name); err != nil {
return err
}
}
} }
s.trim(nil)
default: default:
return &UnsupportedTypeError{typ} return &UnsupportedTypeError{typ}
} }
...@@ -258,6 +235,94 @@ func (p *printer) marshalValue(val reflect.Value, name string) error { ...@@ -258,6 +235,94 @@ func (p *printer) marshalValue(val reflect.Value, name string) error {
return nil return nil
} }
var ddBytes = []byte("--")
func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
s := parentStack{printer: p}
for i := range tinfo.fields {
finfo := &tinfo.fields[i]
if finfo.flags&(fAttr|fAny) != 0 {
continue
}
vf := val.FieldByIndex(finfo.idx)
switch finfo.flags & fMode {
case fCharData:
switch vf.Kind() {
case reflect.String:
Escape(p, []byte(vf.String()))
case reflect.Slice:
if elem, ok := vf.Interface().([]byte); ok {
Escape(p, elem)
}
}
continue
case fComment:
k := vf.Kind()
if !(k == reflect.String || k == reflect.Slice && vf.Type().Elem().Kind() == reflect.Uint8) {
return fmt.Errorf("xml: bad type for comment field of %s", val.Type())
}
if vf.Len() == 0 {
continue
}
p.WriteString("<!--")
dashDash := false
dashLast := false
switch k {
case reflect.String:
s := vf.String()
dashDash = strings.Index(s, "--") >= 0
dashLast = s[len(s)-1] == '-'
if !dashDash {
p.WriteString(s)
}
case reflect.Slice:
b := vf.Bytes()
dashDash = bytes.Index(b, ddBytes) >= 0
dashLast = b[len(b)-1] == '-'
if !dashDash {
p.Write(b)
}
default:
panic("can't happen")
}
if dashDash {
return fmt.Errorf(`xml: comments must not contain "--"`)
}
if dashLast {
// "--->" is invalid grammar. Make it "- -->"
p.WriteByte(' ')
}
p.WriteString("-->")
continue
case fInnerXml:
iface := vf.Interface()
switch raw := iface.(type) {
case []byte:
p.Write(raw)
continue
case string:
p.WriteString(raw)
continue
}
case fElement:
s.trim(finfo.parents)
if len(finfo.parents) > len(s.stack) {
if vf.Kind() != reflect.Ptr && vf.Kind() != reflect.Interface || !vf.IsNil() {
s.push(finfo.parents[len(s.stack):])
}
}
}
if err := p.marshalValue(vf, finfo); err != nil {
return err
}
}
s.trim(nil)
return nil
}
type parentStack struct { type parentStack struct {
*printer *printer
stack []string stack []string
......
This diff is collapsed.
This diff is collapsed.
...@@ -25,8 +25,8 @@ func TestUnmarshalFeed(t *testing.T) { ...@@ -25,8 +25,8 @@ func TestUnmarshalFeed(t *testing.T) {
// hget http://codereview.appspot.com/rss/mine/rsc // hget http://codereview.appspot.com/rss/mine/rsc
const atomFeedString = ` const atomFeedString = `
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld&lt;&gt;</name></author><entry><title>rietveld: an attempt at pubsubhubbub <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld&lt;&gt;</name></author><entry><title>rietveld: an attempt at pubsubhubbub
</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html"> </title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
An attempt at adding pubsubhubbub support to Rietveld. An attempt at adding pubsubhubbub support to Rietveld.
http://code.google.com/p/pubsubhubbub http://code.google.com/p/pubsubhubbub
http://code.google.com/p/rietveld/issues/detail?id=155 http://code.google.com/p/rietveld/issues/detail?id=155
...@@ -79,39 +79,39 @@ not being used from outside intra_region_diff.py. ...@@ -79,39 +79,39 @@ not being used from outside intra_region_diff.py.
</summary></entry></feed> ` </summary></entry></feed> `
type Feed struct { type Feed struct {
XMLName Name `xml:"http://www.w3.org/2005/Atom feed"` XMLName Name `xml:"http://www.w3.org/2005/Atom feed"`
Title string Title string `xml:"title"`
Id string Id string `xml:"id"`
Link []Link Link []Link `xml:"link"`
Updated Time Updated Time `xml:"updated"`
Author Person Author Person `xml:"author"`
Entry []Entry Entry []Entry `xml:"entry"`
} }
type Entry struct { type Entry struct {
Title string Title string `xml:"title"`
Id string Id string `xml:"id"`
Link []Link Link []Link `xml:"link"`
Updated Time Updated Time `xml:"updated"`
Author Person Author Person `xml:"author"`
Summary Text Summary Text `xml:"summary"`
} }
type Link struct { type Link struct {
Rel string `xml:"attr"` Rel string `xml:"rel,attr"`
Href string `xml:"attr"` Href string `xml:"href,attr"`
} }
type Person struct { type Person struct {
Name string Name string `xml:"name"`
URI string URI string `xml:"uri"`
Email string Email string `xml:"email"`
InnerXML string `xml:"innerxml"` InnerXML string `xml:",innerxml"`
} }
type Text struct { type Text struct {
Type string `xml:"attr"` Type string `xml:"type,attr"`
Body string `xml:"chardata"` Body string `xml:",chardata"`
} }
type Time string type Time string
...@@ -214,44 +214,26 @@ not being used from outside intra_region_diff.py. ...@@ -214,44 +214,26 @@ not being used from outside intra_region_diff.py.
}, },
} }
type FieldNameTest struct {
in, out string
}
var FieldNameTests = []FieldNameTest{
{"Profile-Image", "profileimage"},
{"_score", "score"},
}
func TestFieldName(t *testing.T) {
for _, tt := range FieldNameTests {
a := fieldName(tt.in)
if a != tt.out {
t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out)
}
}
}
const pathTestString = ` const pathTestString = `
<result> <Result>
<before>1</before> <Before>1</Before>
<items> <Items>
<item1> <Item1>
<value>A</value> <Value>A</Value>
</item1> </Item1>
<item2> <Item2>
<value>B</value> <Value>B</Value>
</item2> </Item2>
<Item1> <Item1>
<Value>C</Value> <Value>C</Value>
<Value>D</Value> <Value>D</Value>
</Item1> </Item1>
<_> <_>
<value>E</value> <Value>E</Value>
</_> </_>
</items> </Items>
<after>2</after> <After>2</After>
</result> </Result>
` `
type PathTestItem struct { type PathTestItem struct {
...@@ -259,18 +241,18 @@ type PathTestItem struct { ...@@ -259,18 +241,18 @@ type PathTestItem struct {
} }
type PathTestA struct { type PathTestA struct {
Items []PathTestItem `xml:">item1"` Items []PathTestItem `xml:">Item1"`
Before, After string Before, After string
} }
type PathTestB struct { type PathTestB struct {
Other []PathTestItem `xml:"items>Item1"` Other []PathTestItem `xml:"Items>Item1"`
Before, After string Before, After string
} }
type PathTestC struct { type PathTestC struct {
Values1 []string `xml:"items>item1>value"` Values1 []string `xml:"Items>Item1>Value"`
Values2 []string `xml:"items>item2>value"` Values2 []string `xml:"Items>Item2>Value"`
Before, After string Before, After string
} }
...@@ -279,12 +261,12 @@ type PathTestSet struct { ...@@ -279,12 +261,12 @@ type PathTestSet struct {
} }
type PathTestD struct { type PathTestD struct {
Other PathTestSet `xml:"items>"` Other PathTestSet `xml:"Items"`
Before, After string Before, After string
} }
type PathTestE struct { type PathTestE struct {
Underline string `xml:"items>_>value"` Underline string `xml:"Items>_>Value"`
Before, After string Before, After string
} }
...@@ -311,7 +293,7 @@ func TestUnmarshalPaths(t *testing.T) { ...@@ -311,7 +293,7 @@ func TestUnmarshalPaths(t *testing.T) {
type BadPathTestA struct { type BadPathTestA struct {
First string `xml:"items>item1"` First string `xml:"items>item1"`
Other string `xml:"items>item2"` Other string `xml:"items>item2"`
Second string `xml:"items>"` Second string `xml:"items"`
} }
type BadPathTestB struct { type BadPathTestB struct {
...@@ -320,76 +302,50 @@ type BadPathTestB struct { ...@@ -320,76 +302,50 @@ type BadPathTestB struct {
Second string `xml:"items>item1>value"` Second string `xml:"items>item1>value"`
} }
type BadPathTestC struct {
First string
Second string `xml:"First"`
}
type BadPathTestD struct {
BadPathEmbeddedA
BadPathEmbeddedB
}
type BadPathEmbeddedA struct {
First string
}
type BadPathEmbeddedB struct {
Second string `xml:"First"`
}
var badPathTests = []struct { var badPathTests = []struct {
v, e interface{} v, e interface{}
}{ }{
{&BadPathTestA{}, &TagPathError{reflect.TypeOf(BadPathTestA{}), "First", "items>item1", "Second", "items>"}}, {&BadPathTestA{}, &TagPathError{reflect.TypeOf(BadPathTestA{}), "First", "items>item1", "Second", "items"}},
{&BadPathTestB{}, &TagPathError{reflect.TypeOf(BadPathTestB{}), "First", "items>item1", "Second", "items>item1>value"}}, {&BadPathTestB{}, &TagPathError{reflect.TypeOf(BadPathTestB{}), "First", "items>item1", "Second", "items>item1>value"}},
{&BadPathTestC{}, &TagPathError{reflect.TypeOf(BadPathTestC{}), "First", "", "Second", "First"}},
{&BadPathTestD{}, &TagPathError{reflect.TypeOf(BadPathTestD{}), "First", "", "Second", "First"}},
} }
func TestUnmarshalBadPaths(t *testing.T) { func TestUnmarshalBadPaths(t *testing.T) {
for _, tt := range badPathTests { for _, tt := range badPathTests {
err := Unmarshal(strings.NewReader(pathTestString), tt.v) err := Unmarshal(strings.NewReader(pathTestString), tt.v)
if !reflect.DeepEqual(err, tt.e) { if !reflect.DeepEqual(err, tt.e) {
t.Fatalf("Unmarshal with %#v didn't fail properly: %#v", tt.v, err) t.Fatalf("Unmarshal with %#v didn't fail properly:\nhave %#v,\nwant %#v", tt.v, err, tt.e)
} }
} }
} }
func TestUnmarshalAttrs(t *testing.T) {
var f AttrTest
if err := Unmarshal(strings.NewReader(attrString), &f); err != nil {
t.Fatalf("Unmarshal: %s", err)
}
if !reflect.DeepEqual(f, attrStruct) {
t.Fatalf("have %#v\nwant %#v", f, attrStruct)
}
}
type AttrTest struct {
Test1 Test1
Test2 Test2
}
type Test1 struct {
Int int `xml:"attr"`
Float float64 `xml:"attr"`
Uint8 uint8 `xml:"attr"`
}
type Test2 struct {
Bool bool `xml:"attr"`
}
const attrString = `
<?xml version="1.0" charset="utf-8"?>
<attrtest>
<test1 int="8" float="23.5" uint8="255"/>
<test2 bool="true"/>
</attrtest>
`
var attrStruct = AttrTest{
Test1: Test1{
Int: 8,
Float: 23.5,
Uint8: 255,
},
Test2: Test2{
Bool: true,
},
}
// test data for TestUnmarshalWithoutNameType
const OK = "OK" const OK = "OK"
const withoutNameTypeData = ` const withoutNameTypeData = `
<?xml version="1.0" charset="utf-8"?> <?xml version="1.0" charset="utf-8"?>
<Test3 attr="OK" />` <Test3 Attr="OK" />`
type TestThree struct { type TestThree struct {
XMLName bool `xml:"Test3"` // XMLName field without an xml.Name type XMLName Name `xml:"Test3"`
Attr string `xml:"attr"` Attr string `xml:",attr"`
} }
func TestUnmarshalWithoutNameType(t *testing.T) { func TestUnmarshalWithoutNameType(t *testing.T) {
......
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package xml
import (
"fmt"
"reflect"
"strings"
"sync"
)
// typeInfo holds details for the xml representation of a type.
type typeInfo struct {
xmlname *fieldInfo
fields []fieldInfo
}
// fieldInfo holds details for the xml representation of a single field.
type fieldInfo struct {
idx []int
name string
xmlns string
flags fieldFlags
parents []string
}
type fieldFlags int
const (
fElement fieldFlags = 1 << iota
fAttr
fCharData
fInnerXml
fComment
fAny
// TODO:
//fIgnore
//fOmitEmpty
fMode = fElement | fAttr | fCharData | fInnerXml | fComment | fAny
)
var tinfoMap = make(map[reflect.Type]*typeInfo)
var tinfoLock sync.RWMutex
// getTypeInfo returns the typeInfo structure with details necessary
// for marshalling and unmarshalling typ.
func getTypeInfo(typ reflect.Type) (*typeInfo, error) {
tinfoLock.RLock()
tinfo, ok := tinfoMap[typ]
tinfoLock.RUnlock()
if ok {
return tinfo, nil
}
tinfo = &typeInfo{}
if typ.Kind() == reflect.Struct {
n := typ.NumField()
for i := 0; i < n; i++ {
f := typ.Field(i)
if f.PkgPath != "" {
continue // Private field
}
// For embedded structs, embed its fields.
if f.Anonymous {
if f.Type.Kind() != reflect.Struct {
continue
}
inner, err := getTypeInfo(f.Type)
if err != nil {
return nil, err
}
for _, finfo := range inner.fields {
finfo.idx = append([]int{i}, finfo.idx...)
if err := addFieldInfo(typ, tinfo, &finfo); err != nil {
return nil, err
}
}
continue
}
finfo, err := structFieldInfo(typ, &f)
if err != nil {
return nil, err
}
if f.Name == "XMLName" {
tinfo.xmlname = finfo
continue
}
// Add the field if it doesn't conflict with other fields.
if err := addFieldInfo(typ, tinfo, finfo); err != nil {
return nil, err
}
}
}
tinfoLock.Lock()
tinfoMap[typ] = tinfo
tinfoLock.Unlock()
return tinfo, nil
}
// structFieldInfo builds and returns a fieldInfo for f.
func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, error) {
finfo := &fieldInfo{idx: f.Index}
// Split the tag from the xml namespace if necessary.
tag := f.Tag.Get("xml")
if i := strings.Index(tag, " "); i >= 0 {
finfo.xmlns, tag = tag[:i], tag[i+1:]
}
// Parse flags.
tokens := strings.Split(tag, ",")
if len(tokens) == 1 {
finfo.flags = fElement
} else {
tag = tokens[0]
for _, flag := range tokens[1:] {
switch flag {
case "attr":
finfo.flags |= fAttr
case "chardata":
finfo.flags |= fCharData
case "innerxml":
finfo.flags |= fInnerXml
case "comment":
finfo.flags |= fComment
case "any":
finfo.flags |= fAny
}
}
// Validate the flags used.
switch mode := finfo.flags & fMode; mode {
case 0:
finfo.flags |= fElement
case fAttr, fCharData, fInnerXml, fComment, fAny:
if f.Name != "XMLName" && (tag == "" || mode == fAttr) {
break
}
fallthrough
default:
// This will also catch multiple modes in a single field.
return nil, fmt.Errorf("xml: invalid tag in field %s of type %s: %q",
f.Name, typ, f.Tag.Get("xml"))
}
}
// Use of xmlns without a name is not allowed.
if finfo.xmlns != "" && tag == "" {
return nil, fmt.Errorf("xml: namespace without name in field %s of type %s: %q",
f.Name, typ, f.Tag.Get("xml"))
}
if f.Name == "XMLName" {
// The XMLName field records the XML element name. Don't
// process it as usual because its name should default to
// empty rather than to the field name.
finfo.name = tag
return finfo, nil
}
if tag == "" {
// If the name part of the tag is completely empty, get
// default from XMLName of underlying struct if feasible,
// or field name otherwise.
if xmlname := lookupXMLName(f.Type); xmlname != nil {
finfo.xmlns, finfo.name = xmlname.xmlns, xmlname.name
} else {
finfo.name = f.Name
}
return finfo, nil
}
// Prepare field name and parents.
tokens = strings.Split(tag, ">")
if tokens[0] == "" {
tokens[0] = f.Name
}
if tokens[len(tokens)-1] == "" {
return nil, fmt.Errorf("xml: trailing '>' in field %s of type %s", f.Name, typ)
}
finfo.name = tokens[len(tokens)-1]
if len(tokens) > 1 {
finfo.parents = tokens[:len(tokens)-1]
}
// If the field type has an XMLName field, the names must match
// so that the behavior of both marshalling and unmarshalling
// is straighforward and unambiguous.
if finfo.flags&fElement != 0 {
ftyp := f.Type
xmlname := lookupXMLName(ftyp)
if xmlname != nil && xmlname.name != finfo.name {
return nil, fmt.Errorf("xml: name %q in tag of %s.%s conflicts with name %q in %s.XMLName",
finfo.name, typ, f.Name, xmlname.name, ftyp)
}
}
return finfo, nil
}
// lookupXMLName returns the fieldInfo for typ's XMLName field
// in case it exists and has a valid xml field tag, otherwise
// it returns nil.
func lookupXMLName(typ reflect.Type) (xmlname *fieldInfo) {
for typ.Kind() == reflect.Ptr {
typ = typ.Elem()
}
if typ.Kind() != reflect.Struct {
return nil
}
for i, n := 0, typ.NumField(); i < n; i++ {
f := typ.Field(i)
if f.Name != "XMLName" {
continue
}
finfo, err := structFieldInfo(typ, &f)
if finfo.name != "" && err == nil {
return finfo
}
// Also consider errors as a non-existent field tag
// and let getTypeInfo itself report the error.
break
}
return nil
}
func min(a, b int) int {
if a <= b {
return a
}
return b
}
// addFieldInfo adds finfo to tinfo.fields if there are no
// conflicts, or if conflicts arise from previous fields that were
// obtained from deeper embedded structures than finfo. In the latter
// case, the conflicting entries are dropped.
// A conflict occurs when the path (parent + name) to a field is
// itself a prefix of another path, or when two paths match exactly.
// It is okay for field paths to share a common, shorter prefix.
func addFieldInfo(typ reflect.Type, tinfo *typeInfo, newf *fieldInfo) error {
var conflicts []int
Loop:
// First, figure all conflicts. Most working code will have none.
for i := range tinfo.fields {
oldf := &tinfo.fields[i]
if oldf.flags&fMode != newf.flags&fMode {
continue
}
minl := min(len(newf.parents), len(oldf.parents))
for p := 0; p < minl; p++ {
if oldf.parents[p] != newf.parents[p] {
continue Loop
}
}
if len(oldf.parents) > len(newf.parents) {
if oldf.parents[len(newf.parents)] == newf.name {
conflicts = append(conflicts, i)
}
} else if len(oldf.parents) < len(newf.parents) {
if newf.parents[len(oldf.parents)] == oldf.name {
conflicts = append(conflicts, i)
}
} else {
if newf.name == oldf.name {
conflicts = append(conflicts, i)
}
}
}
// Without conflicts, add the new field and return.
if conflicts == nil {
tinfo.fields = append(tinfo.fields, *newf)
return nil
}
// If any conflict is shallower, ignore the new field.
// This matches the Go field resolution on embedding.
for _, i := range conflicts {
if len(tinfo.fields[i].idx) < len(newf.idx) {
return nil
}
}
// Otherwise, if any of them is at the same depth level, it's an error.
for _, i := range conflicts {
oldf := &tinfo.fields[i]
if len(oldf.idx) == len(newf.idx) {
f1 := typ.FieldByIndex(oldf.idx)
f2 := typ.FieldByIndex(newf.idx)
return &TagPathError{typ, f1.Name, f1.Tag.Get("xml"), f2.Name, f2.Tag.Get("xml")}
}
}
// Otherwise, the new field is shallower, and thus takes precedence,
// so drop the conflicting fields from tinfo and append the new one.
for c := len(conflicts) - 1; c >= 0; c-- {
i := conflicts[c]
copy(tinfo.fields[i:], tinfo.fields[i+1:])
tinfo.fields = tinfo.fields[:len(tinfo.fields)-1]
}
tinfo.fields = append(tinfo.fields, *newf)
return nil
}
// A TagPathError represents an error in the unmarshalling process
// caused by the use of field tags with conflicting paths.
type TagPathError struct {
Struct reflect.Type
Field1, Tag1 string
Field2, Tag2 string
}
func (e *TagPathError) Error() string {
return fmt.Sprintf("%s field %q with tag %q conflicts with field %q with tag %q", e.Struct, e.Field1, e.Tag1, e.Field2, e.Tag2)
}
...@@ -344,26 +344,26 @@ var all = allScalars{ ...@@ -344,26 +344,26 @@ var all = allScalars{
var sixteen = "16" var sixteen = "16"
const testScalarsInput = `<allscalars> const testScalarsInput = `<allscalars>
<true1>true</true1> <True1>true</True1>
<true2>1</true2> <True2>1</True2>
<false1>false</false1> <False1>false</False1>
<false2>0</false2> <False2>0</False2>
<int>1</int> <Int>1</Int>
<int8>-2</int8> <Int8>-2</Int8>
<int16>3</int16> <Int16>3</Int16>
<int32>-4</int32> <Int32>-4</Int32>
<int64>5</int64> <Int64>5</Int64>
<uint>6</uint> <Uint>6</Uint>
<uint8>7</uint8> <Uint8>7</Uint8>
<uint16>8</uint16> <Uint16>8</Uint16>
<uint32>9</uint32> <Uint32>9</Uint32>
<uint64>10</uint64> <Uint64>10</Uint64>
<uintptr>11</uintptr> <Uintptr>11</Uintptr>
<float>12.0</float> <Float>12.0</Float>
<float32>13.0</float32> <Float32>13.0</Float32>
<float64>14.0</float64> <Float64>14.0</Float64>
<string>15</string> <String>15</String>
<ptrstring>16</ptrstring> <PtrString>16</PtrString>
</allscalars>` </allscalars>`
func TestAllScalars(t *testing.T) { func TestAllScalars(t *testing.T) {
...@@ -384,7 +384,7 @@ type item struct { ...@@ -384,7 +384,7 @@ type item struct {
} }
func TestIssue569(t *testing.T) { func TestIssue569(t *testing.T) {
data := `<item><field_a>abcd</field_a></item>` data := `<item><Field_a>abcd</Field_a></item>`
var i item var i item
buf := bytes.NewBufferString(data) buf := bytes.NewBufferString(data)
err := Unmarshal(buf, &i) err := Unmarshal(buf, &i)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment