Commit b4e1ca25 authored by Mike Samuel's avatar Mike Samuel

exp/template/html: allow quotes on either side of conditionals and dynamic HTML names

This addresses several use cases:

(1) <h{{.HeaderLevel}}> used to build hierarchical documents.
(2) <input on{{.EventType}}=...> used in widgets.
(3) <div {{" dir=ltr"}}> used to embed bidi-hints.

It also makes sure that we treat the two templates below the same:

<img src={{if .Avatar}}"{{.Avatar}}"{{else}}"anonymous.png"{{end}}>
<img src="{{if .Avatar}}{{.Avatar}}{{else}}anonymous.png{{end}}">

This splits up tTag into a number of sub-states and adds testcases.

R=nigeltao
CC=golang-dev
https://golang.org/cl/5043042
parent 52a46bb7
......@@ -19,11 +19,15 @@ type (
CSS string
// HTML encapsulates a known safe HTML document fragment.
// Should not be used for HTML from a third-party, or HTML with
// It should not be used for HTML from a third-party, or HTML with
// unclosed tags or comments. The outputs of a sound HTML sanitizer
// and a template escaped by this package are fine for use with HTML.
HTML string
// HTMLAttr encapsulates an HTML attribute from a trusted source,
// for example: ` dir="ltr"`.
HTMLAttr string
// JS encapsulates a known safe EcmaScript5 Expression, or example,
// `(x + y * z())`.
// Template authors are responsible for ensuring that typed expressions
......@@ -56,6 +60,7 @@ const (
contentTypePlain contentType = iota
contentTypeCSS
contentTypeHTML
contentTypeHTMLAttr
contentTypeJS
contentTypeJSStr
contentTypeURL
......@@ -71,6 +76,8 @@ func stringify(args ...interface{}) (string, contentType) {
return string(s), contentTypeCSS
case HTML:
return string(s), contentTypeHTML
case HTMLAttr:
return string(s), contentTypeHTMLAttr
case JS:
return string(s), contentTypeJS
case JSStr:
......
......@@ -16,6 +16,7 @@ func TestTypedContent(t *testing.T) {
`<b> "foo%" O'Reilly &bar;`,
CSS(`a[href =~ "//example.com"]#foo`),
HTML(`Hello, <b>World</b> &amp;tc!`),
HTMLAttr(` dir="ltr"`),
JS(`c && alert("Hello, World!");`),
JSStr(`Hello, World & O'Reilly\x21`),
URL(`greeting=H%69&addressee=(World)`),
......@@ -38,6 +39,7 @@ func TestTypedContent(t *testing.T) {
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
},
},
{
......@@ -50,6 +52,7 @@ func TestTypedContent(t *testing.T) {
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
},
},
{
......@@ -59,11 +62,25 @@ func TestTypedContent(t *testing.T) {
`a[href =~ &#34;//example.com&#34;]#foo`,
// Not escaped.
`Hello, <b>World</b> &amp;tc!`,
` dir=&#34;ltr&#34;`,
`c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
`Hello, World &amp; O&#39;Reilly\x21`,
`greeting=H%69&amp;addressee=(World)`,
},
},
{
`<a{{.}}>`,
[]string{
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
// Allowed and HTML escaped.
` dir="ltr"`,
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
},
},
{
`<a title={{.}}>`,
[]string{
......@@ -71,6 +88,7 @@ func TestTypedContent(t *testing.T) {
`a[href&#32;&#61;~&#32;&#34;//example.com&#34;]#foo`,
// Tags stripped, spaces escaped, entity not re-escaped.
`Hello,&#32;World&#32;&amp;tc!`,
`&#32;dir&#61;&#34;ltr&#34;`,
`c&#32;&amp;&amp;&#32;alert(&#34;Hello,&#32;World!&#34;);`,
`Hello,&#32;World&#32;&amp;&#32;O&#39;Reilly\x21`,
`greeting&#61;H%69&amp;addressee&#61;(World)`,
......@@ -83,6 +101,7 @@ func TestTypedContent(t *testing.T) {
`a[href =~ &#34;//example.com&#34;]#foo`,
// Tags stripped, entity not re-escaped.
`Hello, World &amp;tc!`,
` dir=&#34;ltr&#34;`,
`c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
`Hello, World &amp; O&#39;Reilly\x21`,
`greeting=H%69&amp;addressee=(World)`,
......@@ -95,6 +114,7 @@ func TestTypedContent(t *testing.T) {
`a[href =~ &#34;//example.com&#34;]#foo`,
// Angle brackets escaped to prevent injection of close tags, entity not re-escaped.
`Hello, &lt;b&gt;World&lt;/b&gt; &amp;tc!`,
` dir=&#34;ltr&#34;`,
`c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
`Hello, World &amp; O&#39;Reilly\x21`,
`greeting=H%69&amp;addressee=(World)`,
......@@ -106,6 +126,7 @@ func TestTypedContent(t *testing.T) {
`"\u003cb\u003e \"foo%\" O'Reilly &bar;"`,
`"a[href =~ \"//example.com\"]#foo"`,
`"Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;tc!"`,
`" dir=\"ltr\""`,
// Not escaped.
`c && alert("Hello, World!");`,
// Escape sequence not over-escaped.
......@@ -119,6 +140,7 @@ func TestTypedContent(t *testing.T) {
`&#34;\u003cb\u003e \&#34;foo%\&#34; O&#39;Reilly &amp;bar;&#34;`,
`&#34;a[href =~ \&#34;//example.com\&#34;]#foo&#34;`,
`&#34;Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;amp;tc!&#34;`,
`&#34; dir=\&#34;ltr\&#34;&#34;`,
// Not JS escaped but HTML escaped.
`c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
// Escape sequence not over-escaped.
......@@ -132,6 +154,7 @@ func TestTypedContent(t *testing.T) {
`\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
`a[href =~ \x22\/\/example.com\x22]#foo`,
`Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
` dir=\x22ltr\x22`,
`c \x26\x26 alert(\x22Hello, World!\x22);`,
// Escape sequence not over-escaped.
`Hello, World \x26 O\x27Reilly\x21`,
......@@ -144,6 +167,7 @@ func TestTypedContent(t *testing.T) {
`\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
`a[href =~ \x22\/\/example.com\x22]#foo`,
`Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
` dir=\x22ltr\x22`,
`c \x26\x26 alert(\x22Hello, World!\x22);`,
// Escape sequence not over-escaped.
`Hello, World \x26 O\x27Reilly\x21`,
......@@ -156,6 +180,7 @@ func TestTypedContent(t *testing.T) {
`%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
`a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
`Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
`%20dir%3d%22ltr%22`,
`c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
`Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
// Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is done.
......@@ -168,6 +193,7 @@ func TestTypedContent(t *testing.T) {
`%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
`a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
`Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
`%20dir%3d%22ltr%22`,
`c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
`Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
// Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is not done.
......
......@@ -20,6 +20,7 @@ type context struct {
delim delim
urlPart urlPart
jsCtx jsCtx
attr attr
element element
err *Error
}
......@@ -30,6 +31,7 @@ func (c context) eq(d context) bool {
c.delim == d.delim &&
c.urlPart == d.urlPart &&
c.jsCtx == d.jsCtx &&
c.attr == d.attr &&
c.element == d.element &&
c.err == d.err
}
......@@ -51,6 +53,9 @@ func (c context) mangle(templateName string) string {
if c.jsCtx != 0 {
s += "_" + c.jsCtx.String()
}
if c.attr != 0 {
s += "_" + c.attr.String()
}
if c.element != 0 {
s += "_" + c.element.String()
}
......@@ -75,6 +80,15 @@ const (
stateText state = iota
// stateTag occurs before an HTML attribute or the end of a tag.
stateTag
// stateAttrName occurs inside an attribute name.
// It occurs between the ^'s in ` ^name^ = value`.
stateAttrName
// stateAfterName occurs after an attr name has ended but before any
// equals sign. It occurs between the ^'s in ` name^ ^= value`.
stateAfterName
// stateBeforeValue occurs after the equals sign but before the value.
// It occurs between the ^'s in ` name =^ ^value`.
stateBeforeValue
// stateComment occurs inside an <!-- HTML comment -->.
stateComment
// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
......@@ -120,6 +134,9 @@ const (
var stateNames = [...]string{
stateText: "stateText",
stateTag: "stateTag",
stateAttrName: "stateAttrName",
stateAfterName: "stateAfterName",
stateBeforeValue: "stateBeforeValue",
stateComment: "stateComment",
stateRCDATA: "stateRCDATA",
stateAttr: "stateAttr",
......@@ -145,7 +162,7 @@ func (s state) String() string {
if int(s) < len(stateNames) {
return stateNames[s]
}
return fmt.Sprintf("illegal state %d", s)
return fmt.Sprintf("illegal state %d", int(s))
}
// delim is the delimiter that will end the current HTML attribute.
......@@ -174,7 +191,7 @@ func (d delim) String() string {
if int(d) < len(delimNames) {
return delimNames[d]
}
return fmt.Sprintf("illegal delim %d", d)
return fmt.Sprintf("illegal delim %d", int(d))
}
// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
......@@ -207,7 +224,7 @@ func (u urlPart) String() string {
if int(u) < len(urlPartNames) {
return urlPartNames[u]
}
return fmt.Sprintf("illegal urlPart %d", u)
return fmt.Sprintf("illegal urlPart %d", int(u))
}
// jsCtx determines whether a '/' starts a regular expression literal or a
......@@ -232,7 +249,7 @@ func (c jsCtx) String() string {
case jsCtxUnknown:
return "jsCtxUnknown"
}
return fmt.Sprintf("illegal jsCtx %d", c)
return fmt.Sprintf("illegal jsCtx %d", int(c))
}
// element identifies the HTML element when inside a start tag or special body.
......@@ -267,5 +284,33 @@ func (e element) String() string {
if int(e) < len(elementNames) {
return elementNames[e]
}
return fmt.Sprintf("illegal element %d", e)
return fmt.Sprintf("illegal element %d", int(e))
}
// attr identifies the most recent HTML attribute when inside a start tag.
type attr uint8
const (
// attrNone corresponds to a normal attribute or no attribute.
attrNone attr = iota
// attrScript corresponds to an event handler attribute.
attrScript
// attrStyle corresponds to the style attribute whose value is CSS.
attrStyle
// attrURL corresponds to an attribute whose value is a URL.
attrURL
)
var attrNames = [...]string{
attrNone: "attrNone",
attrScript: "attrScript",
attrStyle: "attrStyle",
attrURL: "attrURL",
}
func (a attr) String() string {
if int(a) < len(attrNames) {
return attrNames[a]
}
return fmt.Sprintf("illegal attr %d", int(a))
}
......@@ -66,6 +66,7 @@ var funcMap = template.FuncMap{
"exp_template_html_attrescaper": attrEscaper,
"exp_template_html_cssescaper": cssEscaper,
"exp_template_html_cssvaluefilter": cssValueFilter,
"exp_template_html_htmlnamefilter": htmlNameFilter,
"exp_template_html_htmlescaper": htmlEscaper,
"exp_template_html_jsregexpescaper": jsRegexpEscaper,
"exp_template_html_jsstrescaper": jsStrEscaper,
......@@ -151,8 +152,11 @@ func (e *escaper) escape(c context, n parse.Node) context {
// escapeAction escapes an action template node.
func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
c = nudge(c)
s := make([]string, 0, 3)
switch c.state {
case stateError:
return c
case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
switch c.urlPart {
case urlPartNone:
......@@ -194,6 +198,13 @@ func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
s = append(s, "exp_template_html_htmlescaper")
case stateRCDATA:
s = append(s, "exp_template_html_rcdataescaper")
case stateAttr:
// Handled below in delim check.
case stateAttrName, stateTag:
c.state = stateAttrName
s = append(s, "exp_template_html_htmlnamefilter")
default:
panic("unexpected state " + c.state.String())
}
switch c.delim {
case delimNone:
......@@ -289,6 +300,33 @@ func newIdentCmd(identifier string) *parse.CommandNode {
}
}
// nudge returns the context that would result from following empty string
// transitions from the input context.
// For example, parsing:
// `<a href=`
// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
// `<a href=x`
// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
// There are two transitions that happen when the 'x' is seen:
// (1) Transition from a before-value state to a start-of-value state without
// consuming any character.
// (2) Consume 'x' and transition past the first value character.
// In this case, nudging produces the context after (1) happens.
func nudge(c context) context {
switch c.state {
case stateTag:
// In `<foo {{.}}`, the action should emit an attribute.
c.state = stateAttrName
case stateBeforeValue:
// In `<foo bar={{.}}`, the action is an undelimited value.
c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
case stateAfterName:
// In `<foo bar {{.}}`, the action is an attribute name.
c.state, c.attr = stateAttrName, attrNone
}
return c
}
// join joins the two contexts of a branch template node. The result is an
// error context if either of the input contexts are error contexts, or if the
// the input contexts differ.
......@@ -319,6 +357,17 @@ func join(a, b context, line int, nodeName string) context {
return c
}
// Allow a nudged context to join with an unnudged one.
// This means that
// <p title={{if .C}}{{.}}{{end}}
// ends in an unquoted value state even though the else branch
// ends in stateBeforeValue.
if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
if e := join(c, d, line, nodeName); e.state != stateError {
return e
}
}
return context{
state: stateError,
err: errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
......
......@@ -411,6 +411,51 @@ func TestEscape(t *testing.T) {
`<textarea><{{"/textarea "}}...</textarea>`,
`<textarea>&lt;/textarea ...</textarea>`,
},
{
"optional attrs",
`<img class="{{"iconClass"}}"` +
`{{if .T}} id="{{"<iconId>"}}"{{end}}` +
// Double quotes inside if/else.
` src=` +
`{{if .T}}"?{{"<iconPath>"}}"` +
`{{else}}"images/cleardot.gif"{{end}}` +
// Missing space before title, but it is not a
// part of the src attribute.
`{{if .T}}title="{{"<title>"}}"{{end}}` +
// Quotes outside if/else.
` alt="` +
`{{if .T}}{{"<alt>"}}` +
`{{else}}{{if .F}}{{"<title>"}}{{end}}` +
`{{end}}"` +
`>`,
`<img class="iconClass" id="&lt;iconId&gt;" src="?%3ciconPath%3e"title="&lt;title&gt;" alt="&lt;alt&gt;">`,
},
{
"conditional valueless attr name",
`<input{{if .T}} checked{{end}} name=n>`,
`<input checked name=n>`,
},
{
"conditional dynamic valueless attr name 1",
`<input{{if .T}} {{"checked"}}{{end}} name=n>`,
`<input checked name=n>`,
},
{
"conditional dynamic valueless attr name 2",
`<input {{if .T}}{{"checked"}} {{end}}name=n>`,
`<input checked name=n>`,
},
{
"dynamic attribute name",
`<img on{{"load"}}="alert({{"loaded"}})">`,
// Treated as JS since quotes are inserted.
`<img onload="alert(&#34;loaded&#34;)">`,
},
{
"dynamic element name",
`<h{{3}}><table><t{{"head"}}>...</h{{3}}>`,
`<h3><table><thead>...</h3>`,
},
}
for _, test := range tests {
......@@ -780,9 +825,25 @@ func TestEscapeText(t *testing.T) {
`<a>`,
context{state: stateText},
},
{
`<a href`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a on`,
context{state: stateAttrName, attr: attrScript},
},
{
`<a href `,
context{state: stateAfterName, attr: attrURL},
},
{
`<a style = `,
context{state: stateBeforeValue, attr: attrStyle},
},
{
`<a href=`,
context{state: stateURL, delim: delimSpaceOrTagEnd},
context{state: stateBeforeValue, attr: attrURL},
},
{
`<a href=x`,
......
......@@ -205,3 +205,22 @@ func stripTags(html string) string {
}
return b.String()
}
// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
// a known-safe HTML attribute.
func htmlNameFilter(args ...interface{}) string {
s, t := stringify(args...)
if t == contentTypeHTMLAttr {
return s
}
for _, r := range s {
switch {
case '0' <= r && r <= '9':
case 'A' <= r && r <= 'Z':
case 'a' <= r && r <= 'z':
default:
return filterFailsafe
}
}
return s
}
......@@ -18,6 +18,9 @@ import (
var transitionFunc = [...]func(context, []byte) (context, []byte){
stateText: tText,
stateTag: tTag,
stateAttrName: tAttrName,
stateAfterName: tAfterName,
stateBeforeValue: tBeforeValue,
stateComment: tComment,
stateRCDATA: tSpecialTagEnd,
stateAttr: tAttr,
......@@ -79,54 +82,90 @@ var elementContentType = [...]state{
// tTag is the context transition function for the tag state.
func tTag(c context, s []byte) (context, []byte) {
// Find the attribute name.
attrStart := eatWhiteSpace(s, 0)
i, err := eatAttrName(s, attrStart)
if err != nil {
return context{
state: stateError,
err: err,
}, nil
}
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, nil
}
state := stateAttr
canonAttrName := strings.ToLower(string(s[attrStart:i]))
if urlAttr[canonAttrName] {
state = stateURL
} else if strings.HasPrefix(canonAttrName, "on") {
state = stateJS
} else if canonAttrName == "style" {
state = stateCSS
if s[i] == '>' {
return context{
state: elementContentType[c.element],
element: c.element,
}, s[i+1:]
}
j, err := eatAttrName(s, i)
if err != nil {
return context{state: stateError, err: err}, nil
}
state, attr := stateTag, attrNone
if i != j {
canonAttrName := strings.ToLower(string(s[i:j]))
if urlAttr[canonAttrName] {
attr = attrURL
} else if strings.HasPrefix(canonAttrName, "on") {
attr = attrScript
} else if canonAttrName == "style" {
attr = attrStyle
}
if j == len(s) {
state = stateAttrName
} else {
state = stateAfterName
}
}
return context{state: state, element: c.element, attr: attr}, s[j:]
}
// tAttrName is the context transition function for stateAttrName.
func tAttrName(c context, s []byte) (context, []byte) {
i, err := eatAttrName(s, 0)
if err != nil {
return context{state: stateError, err: err}, nil
} else if i == len(s) {
return c, nil
}
c.state = stateAfterName
return c, s[i:]
}
// tAfterName is the context transition function for stateAfterName.
func tAfterName(c context, s []byte) (context, []byte) {
// Look for the start of the value.
i = eatWhiteSpace(s, i)
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, s[i:]
}
if s[i] == '>' {
state = elementContentType[c.element]
return context{state: state, element: c.element}, s[i+1:]
return c, nil
} else if s[i] != '=' {
// Possible due to a valueless attribute or '/' in "<input />".
// Occurs due to tag ending '>', and valueless attribute.
c.state = stateTag
return c, s[i:]
}
c.state = stateBeforeValue
// Consume the "=".
i = eatWhiteSpace(s, i+1)
return c, s[i+1:]
}
var attrStartStates = [...]state{
attrNone: stateAttr,
attrScript: stateJS,
attrStyle: stateCSS,
attrURL: stateURL,
}
// tBeforeValue is the context transition function for stateBeforeValue.
func tBeforeValue(c context, s []byte) (context, []byte) {
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, nil
}
// Find the attribute delimiter.
delim := delimSpaceOrTagEnd
if i < len(s) {
switch s[i] {
case '\'':
delim, i = delimSingleQuote, i+1
case '"':
delim, i = delimDoubleQuote, i+1
}
switch s[i] {
case '\'':
delim, i = delimSingleQuote, i+1
case '"':
delim, i = delimDoubleQuote, i+1
}
return context{state: state, delim: delim, element: c.element}, s[i:]
c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
return c, s[i:]
}
// tComment is the context transition function for stateComment.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment