Commit b91aea55 authored by Russ Cox's avatar Russ Cox

encoding/xml: add InputOffset method to Decoder

Among other things, this allows users to match the decoded
pieces with the original XML, which can be necessary for
implementing standards like XML signatures.

Fixes #8484.

LGTM=bradfitz
R=bradfitz
CC=golang-codereviews
https://golang.org/cl/122960043
parent fef54b22
...@@ -29,6 +29,7 @@ import ( ...@@ -29,6 +29,7 @@ import (
type SyntaxError struct { type SyntaxError struct {
Msg string Msg string
Line int Line int
Byte int64 // byte offset from start of stream
} }
func (e *SyntaxError) Error() string { func (e *SyntaxError) Error() string {
...@@ -196,6 +197,7 @@ type Decoder struct { ...@@ -196,6 +197,7 @@ type Decoder struct {
ns map[string]string ns map[string]string
err error err error
line int line int
offset int64
unmarshalDepth int unmarshalDepth int
} }
...@@ -859,9 +861,17 @@ func (d *Decoder) getc() (b byte, ok bool) { ...@@ -859,9 +861,17 @@ func (d *Decoder) getc() (b byte, ok bool) {
if b == '\n' { if b == '\n' {
d.line++ d.line++
} }
d.offset++
return b, true return b, true
} }
// InputOffset returns the input stream byte offset of the current decoder position.
// The offset gives the location of the end of the most recently returned token
// and the beginning of the next token.
func (d *Decoder) InputOffset() int64 {
return d.offset
}
// Return saved offset. // Return saved offset.
// If we did ungetc (nextByte >= 0), have to back up one. // If we did ungetc (nextByte >= 0), have to back up one.
func (d *Decoder) savedOffset() int { func (d *Decoder) savedOffset() int {
...@@ -891,6 +901,7 @@ func (d *Decoder) ungetc(b byte) { ...@@ -891,6 +901,7 @@ func (d *Decoder) ungetc(b byte) {
d.line-- d.line--
} }
d.nextByte = int(b) d.nextByte = int(b)
d.offset--
} }
var entity = map[string]int{ var entity = map[string]int{
......
...@@ -170,7 +170,7 @@ var xmlInput = []string{ ...@@ -170,7 +170,7 @@ var xmlInput = []string{
func TestRawToken(t *testing.T) { func TestRawToken(t *testing.T) {
d := NewDecoder(strings.NewReader(testInput)) d := NewDecoder(strings.NewReader(testInput))
d.Entity = testEntity d.Entity = testEntity
testRawToken(t, d, rawTokens) testRawToken(t, d, testInput, rawTokens)
} }
const nonStrictInput = ` const nonStrictInput = `
...@@ -225,7 +225,7 @@ var nonStrictTokens = []Token{ ...@@ -225,7 +225,7 @@ var nonStrictTokens = []Token{
func TestNonStrictRawToken(t *testing.T) { func TestNonStrictRawToken(t *testing.T) {
d := NewDecoder(strings.NewReader(nonStrictInput)) d := NewDecoder(strings.NewReader(nonStrictInput))
d.Strict = false d.Strict = false
testRawToken(t, d, nonStrictTokens) testRawToken(t, d, nonStrictInput, nonStrictTokens)
} }
type downCaser struct { type downCaser struct {
...@@ -254,7 +254,7 @@ func TestRawTokenAltEncoding(t *testing.T) { ...@@ -254,7 +254,7 @@ func TestRawTokenAltEncoding(t *testing.T) {
} }
return &downCaser{t, input.(io.ByteReader)}, nil return &downCaser{t, input.(io.ByteReader)}, nil
} }
testRawToken(t, d, rawTokensAltEncoding) testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
} }
func TestRawTokenAltEncodingNoConverter(t *testing.T) { func TestRawTokenAltEncodingNoConverter(t *testing.T) {
...@@ -280,9 +280,12 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) { ...@@ -280,9 +280,12 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) {
} }
} }
func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) { func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
lastEnd := int64(0)
for i, want := range rawTokens { for i, want := range rawTokens {
start := d.InputOffset()
have, err := d.RawToken() have, err := d.RawToken()
end := d.InputOffset()
if err != nil { if err != nil {
t.Fatalf("token %d: unexpected error: %s", i, err) t.Fatalf("token %d: unexpected error: %s", i, err)
} }
...@@ -300,6 +303,26 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) { ...@@ -300,6 +303,26 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
} }
t.Errorf("token %d = %s, want %s", i, shave, swant) t.Errorf("token %d = %s, want %s", i, shave, swant)
} }
// Check that InputOffset returned actual token.
switch {
case start < lastEnd:
t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
case start >= end:
// Special case: EndElement can be synthesized.
if start == end && end == lastEnd {
break
}
t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
case end > int64(len(raw)):
t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
default:
text := raw[start:end]
if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
}
}
lastEnd = end
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment