Commit f979528c authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

exp/html: special handling for entities in attributes

Don't unescape entities in attributes when they don't end with
a semicolon and they are followed by '=', a letter, or a digit.

Pass 6 more tests from the WebKit test suite, plus one that was
commented out in token_test.go.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6405073
parent 4087c1b8
...@@ -163,14 +163,15 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { ...@@ -163,14 +163,15 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
} }
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b". // unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
func unescape(b []byte) []byte { // attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b { for i, c := range b {
if c == '&' { if c == '&' {
dst, src := unescapeEntity(b, i, i, false) dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) { for src < len(b) {
c := b[src] c := b[src]
if c == '&' { if c == '&' {
dst, src = unescapeEntity(b, dst, src, false) dst, src = unescapeEntity(b, dst, src, attribute)
} else { } else {
b[dst] = c b[dst] = c
dst, src = dst+1, src+1 dst, src = dst+1, src+1
...@@ -250,7 +251,7 @@ func EscapeString(s string) string { ...@@ -250,7 +251,7 @@ func EscapeString(s string) string {
func UnescapeString(s string) string { func UnescapeString(s string) string {
for _, c := range s { for _, c := range s {
if c == '&' { if c == '&' {
return string(unescape([]byte(s))) return string(unescape([]byte(s), false))
} }
} }
return s return s
......
...@@ -2,11 +2,11 @@ PASS "<div bar=\"ZZ&gt;YY\"></div>" ...@@ -2,11 +2,11 @@ PASS "<div bar=\"ZZ&gt;YY\"></div>"
PASS "<div bar=\"ZZ&\"></div>" PASS "<div bar=\"ZZ&\"></div>"
PASS "<div bar='ZZ&'></div>" PASS "<div bar='ZZ&'></div>"
PASS "<div bar=ZZ&></div>" PASS "<div bar=ZZ&></div>"
FAIL "<div bar=\"ZZ&gt=YY\"></div>" PASS "<div bar=\"ZZ&gt=YY\"></div>"
FAIL "<div bar=\"ZZ&gt0YY\"></div>" PASS "<div bar=\"ZZ&gt0YY\"></div>"
FAIL "<div bar=\"ZZ&gt9YY\"></div>" PASS "<div bar=\"ZZ&gt9YY\"></div>"
FAIL "<div bar=\"ZZ&gtaYY\"></div>" PASS "<div bar=\"ZZ&gtaYY\"></div>"
FAIL "<div bar=\"ZZ&gtZYY\"></div>" PASS "<div bar=\"ZZ&gtZYY\"></div>"
PASS "<div bar=\"ZZ&gt YY\"></div>" PASS "<div bar=\"ZZ&gt YY\"></div>"
PASS "<div bar=\"ZZ&gt\"></div>" PASS "<div bar=\"ZZ&gt\"></div>"
PASS "<div bar='ZZ&gt'></div>" PASS "<div bar='ZZ&gt'></div>"
...@@ -15,7 +15,7 @@ PASS "<div bar=\"ZZ&pound_id=23\"></div>" ...@@ -15,7 +15,7 @@ PASS "<div bar=\"ZZ&pound_id=23\"></div>"
PASS "<div bar=\"ZZ&prod_id=23\"></div>" PASS "<div bar=\"ZZ&prod_id=23\"></div>"
PASS "<div bar=\"ZZ&pound;_id=23\"></div>" PASS "<div bar=\"ZZ&pound;_id=23\"></div>"
PASS "<div bar=\"ZZ&prod;_id=23\"></div>" PASS "<div bar=\"ZZ&prod;_id=23\"></div>"
FAIL "<div bar=\"ZZ&pound=23\"></div>" PASS "<div bar=\"ZZ&pound=23\"></div>"
PASS "<div bar=\"ZZ&prod=23\"></div>" PASS "<div bar=\"ZZ&prod=23\"></div>"
PASS "<div>ZZ&pound_id=23</div>" PASS "<div>ZZ&pound_id=23</div>"
PASS "<div>ZZ&prod_id=23</div>" PASS "<div>ZZ&prod_id=23</div>"
......
...@@ -741,7 +741,7 @@ func (z *Tokenizer) Text() []byte { ...@@ -741,7 +741,7 @@ func (z *Tokenizer) Text() []byte {
z.data.end = z.raw.end z.data.end = z.raw.end
s = convertNewlines(s) s = convertNewlines(s)
if !z.textIsRaw { if !z.textIsRaw {
s = unescape(s) s = unescape(s, false)
} }
return s return s
} }
...@@ -775,7 +775,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) { ...@@ -775,7 +775,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
z.nAttrReturned++ z.nAttrReturned++
key = z.buf[x[0].start:x[0].end] key = z.buf[x[0].start:x[0].end]
val = z.buf[x[1].start:x[1].end] val = z.buf[x[1].start:x[1].end]
return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr) return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
} }
} }
return nil, nil, false return nil, nil, false
......
...@@ -370,14 +370,11 @@ var tokenTests = []tokenTest{ ...@@ -370,14 +370,11 @@ var tokenTests = []tokenTest{
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`, `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`, `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
}, },
/*
// TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
{ {
"entity without semicolon", "entity without semicolon",
`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`, `&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`, `¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
}, },
*/
{ {
"entity with digits", "entity with digits",
"&frac12;", "&frac12;",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment