html: make the low-level tokenizer also skip end-tag attributes.

R=andybalholm CC=golang-dev https://golang.org/cl/6453071

html: make the low-level tokenizer also skip end-tag attributes.
R=andybalholm CC=golang-dev https://golang.org/cl/6453071
1916db78 · Nigel Tao · 37d7500f · 1916db78
Commit 1916db78 authored Aug 03, 2012 by Nigel Tao
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 16 deletions

src/pkg/exp/html/token.go src/pkg/exp/html/token.go +13 -16

No files found.
--- a/src/pkg/exp/html/token.go
+++ b/src/pkg/exp/html/token.go
@@ -691,7 +691,7 @@ loop:
 // readStartTag reads the next start tag token. The opening "<a" has already
 // been consumed, where 'a' means anything in [A-Za-z].
 func (z *Tokenizer) readStartTag() TokenType {
-	z.readTag()
+	z.readTag(true)
 	if z.err != nil && len(z.attr) == 0 {
 		return ErrorToken
 	}
@@ -724,9 +724,11 @@ func (z *Tokenizer) readStartTag() TokenType {
 	return StartTagToken
 }
-// readTag reads the next tag token. The opening "<a" or "</a" has already been
+// readTag reads the next tag token and its attributes. If saveAttr, those
-// consumed, where 'a' means anything in [A-Za-z].
+// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
-func (z *Tokenizer) readTag() {
+// The opening "<a" or "</a" has already been consumed, where 'a' means anything
+// in [A-Za-z].
+func (z *Tokenizer) readTag(saveAttr bool) {
 	z.attr = z.attr[:0]
 	z.nAttrReturned = 0
 	// Read the tag name and attribute key/value pairs.
@@ -742,8 +744,8 @@ func (z *Tokenizer) readTag() {
 		z.raw.end--
 		z.readTagAttrKey()
 		z.readTagAttrVal()
-		// Save pendingAttr if it has a non-empty key.
+		// Save pendingAttr if saveAttr and that attribute has a non-empty key.
-		if z.pendingAttr[0].start != z.pendingAttr[0].end {
+		if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
 			z.attr = append(z.attr, z.pendingAttr)
 		}
 		if z.skipWhiteSpace(); z.err != nil {
@@ -945,7 +947,7 @@ loop:
 				continue loop
 			}
 			if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
-				z.readTag()
+				z.readTag(false)
 				z.tt = EndTagToken
 				return z.tt
 			}
@@ -1078,16 +1080,11 @@ func (z *Tokenizer) Token() Token {
 		t.Data = string(z.Text())
 	case StartTagToken, SelfClosingTagToken, EndTagToken:
 		name, moreAttr := z.TagName()
-		// Since end tags should not have attributes, the high-level tokenizer
-		// interface will not return attributes for an end tag token even if
-		// it looks like </br foo="bar">.
-		if z.tt != EndTagToken {
 		for moreAttr {
 			var key, val []byte
 			key, val, moreAttr = z.TagAttr()
 			t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
 		}
-		}
 		if a := atom.Lookup(name); a != 0 {
 			t.DataAtom, t.Data = a, a.String()
 		} else {