Commit f979528c authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

exp/html: special handling for entities in attributes

Don't unescape entities in attributes when they don't end with
a semicolon and they are followed by '=', a letter, or a digit.

Pass 6 more tests from the WebKit test suite, plus one that was
commented out in token_test.go.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6405073
parent 4087c1b8
......@@ -163,14 +163,15 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
func unescape(b []byte) []byte {
// attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, false)
dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, false)
dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
......@@ -250,7 +251,7 @@ func EscapeString(s string) string {
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s)))
return string(unescape([]byte(s), false))
}
}
return s
......
......@@ -2,11 +2,11 @@ PASS "<div bar=\"ZZ&gt;YY\"></div>"
PASS "<div bar=\"ZZ&\"></div>"
PASS "<div bar='ZZ&'></div>"
PASS "<div bar=ZZ&></div>"
FAIL "<div bar=\"ZZ&gt=YY\"></div>"
FAIL "<div bar=\"ZZ&gt0YY\"></div>"
FAIL "<div bar=\"ZZ&gt9YY\"></div>"
FAIL "<div bar=\"ZZ&gtaYY\"></div>"
FAIL "<div bar=\"ZZ&gtZYY\"></div>"
PASS "<div bar=\"ZZ&gt=YY\"></div>"
PASS "<div bar=\"ZZ&gt0YY\"></div>"
PASS "<div bar=\"ZZ&gt9YY\"></div>"
PASS "<div bar=\"ZZ&gtaYY\"></div>"
PASS "<div bar=\"ZZ&gtZYY\"></div>"
PASS "<div bar=\"ZZ&gt YY\"></div>"
PASS "<div bar=\"ZZ&gt\"></div>"
PASS "<div bar='ZZ&gt'></div>"
......@@ -15,7 +15,7 @@ PASS "<div bar=\"ZZ&pound_id=23\"></div>"
PASS "<div bar=\"ZZ&prod_id=23\"></div>"
PASS "<div bar=\"ZZ&pound;_id=23\"></div>"
PASS "<div bar=\"ZZ&prod;_id=23\"></div>"
FAIL "<div bar=\"ZZ&pound=23\"></div>"
PASS "<div bar=\"ZZ&pound=23\"></div>"
PASS "<div bar=\"ZZ&prod=23\"></div>"
PASS "<div>ZZ&pound_id=23</div>"
PASS "<div>ZZ&prod_id=23</div>"
......
......@@ -741,7 +741,7 @@ func (z *Tokenizer) Text() []byte {
z.data.end = z.raw.end
s = convertNewlines(s)
if !z.textIsRaw {
s = unescape(s)
s = unescape(s, false)
}
return s
}
......@@ -775,7 +775,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
z.nAttrReturned++
key = z.buf[x[0].start:x[0].end]
val = z.buf[x[1].start:x[1].end]
return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
return lower(key), unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
}
}
return nil, nil, false
......
......@@ -370,14 +370,11 @@ var tokenTests = []tokenTest{
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
},
/*
// TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
{
"entity without semicolon",
`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
},
*/
{
"entity with digits",
"&frac12;",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment