Commit 1916db78 authored by Nigel Tao's avatar Nigel Tao

html: make the low-level tokenizer also skip end-tag attributes.

R=andybalholm
CC=golang-dev
https://golang.org/cl/6453071
parent 37d7500f
...@@ -691,7 +691,7 @@ loop: ...@@ -691,7 +691,7 @@ loop:
// readStartTag reads the next start tag token. The opening "<a" has already // readStartTag reads the next start tag token. The opening "<a" has already
// been consumed, where 'a' means anything in [A-Za-z]. // been consumed, where 'a' means anything in [A-Za-z].
func (z *Tokenizer) readStartTag() TokenType { func (z *Tokenizer) readStartTag() TokenType {
z.readTag() z.readTag(true)
if z.err != nil && len(z.attr) == 0 { if z.err != nil && len(z.attr) == 0 {
return ErrorToken return ErrorToken
} }
...@@ -724,9 +724,11 @@ func (z *Tokenizer) readStartTag() TokenType { ...@@ -724,9 +724,11 @@ func (z *Tokenizer) readStartTag() TokenType {
return StartTagToken return StartTagToken
} }
// readTag reads the next tag token. The opening "<a" or "</a" has already been // readTag reads the next tag token and its attributes. If saveAttr, those
// consumed, where 'a' means anything in [A-Za-z]. // attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
func (z *Tokenizer) readTag() { // The opening "<a" or "</a" has already been consumed, where 'a' means anything
// in [A-Za-z].
func (z *Tokenizer) readTag(saveAttr bool) {
z.attr = z.attr[:0] z.attr = z.attr[:0]
z.nAttrReturned = 0 z.nAttrReturned = 0
// Read the tag name and attribute key/value pairs. // Read the tag name and attribute key/value pairs.
...@@ -742,8 +744,8 @@ func (z *Tokenizer) readTag() { ...@@ -742,8 +744,8 @@ func (z *Tokenizer) readTag() {
z.raw.end-- z.raw.end--
z.readTagAttrKey() z.readTagAttrKey()
z.readTagAttrVal() z.readTagAttrVal()
// Save pendingAttr if it has a non-empty key. // Save pendingAttr if saveAttr and that attribute has a non-empty key.
if z.pendingAttr[0].start != z.pendingAttr[0].end { if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
z.attr = append(z.attr, z.pendingAttr) z.attr = append(z.attr, z.pendingAttr)
} }
if z.skipWhiteSpace(); z.err != nil { if z.skipWhiteSpace(); z.err != nil {
...@@ -945,7 +947,7 @@ loop: ...@@ -945,7 +947,7 @@ loop:
continue loop continue loop
} }
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
z.readTag() z.readTag(false)
z.tt = EndTagToken z.tt = EndTagToken
return z.tt return z.tt
} }
...@@ -1078,16 +1080,11 @@ func (z *Tokenizer) Token() Token { ...@@ -1078,16 +1080,11 @@ func (z *Tokenizer) Token() Token {
t.Data = string(z.Text()) t.Data = string(z.Text())
case StartTagToken, SelfClosingTagToken, EndTagToken: case StartTagToken, SelfClosingTagToken, EndTagToken:
name, moreAttr := z.TagName() name, moreAttr := z.TagName()
// Since end tags should not have attributes, the high-level tokenizer
// interface will not return attributes for an end tag token even if
// it looks like </br foo="bar">.
if z.tt != EndTagToken {
for moreAttr { for moreAttr {
var key, val []byte var key, val []byte
key, val, moreAttr = z.TagAttr() key, val, moreAttr = z.TagAttr()
t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)}) t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
} }
}
if a := atom.Lookup(name); a != 0 { if a := atom.Lookup(name); a != 0 {
t.DataAtom, t.Data = a, a.String() t.DataAtom, t.Data = a, a.String()
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment