exp/template/html: move transition functions to a separate file

This CL moves code but makes no changes otherwise. R=nigeltao, r CC=golang-dev https://golang.org/cl/5012045

exp/template/html: move transition functions to a separate file
This CL moves code but makes no changes otherwise. R=nigeltao, r CC=golang-dev https://golang.org/cl/5012045
15d47ce2 · Mike Samuel · 4e3b725c · 15d47ce2 · 15d47ce2 · 15d47ce2
Commit 15d47ce2 authored Sep 13, 2011 by Mike Samuel
3 changed files
--- a/src/pkg/exp/template/html/Makefile
+++ b/src/pkg/exp/template/html/Makefile
@@ -12,6 +12,7 @@ GOFILES=\
 	escape.go\
 	html.go\
 	js.go\
+	transition.go\
 	url.go\

 include ../../../../Make.pkg
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@@ -12,7 +12,6 @@ import (
 	"fmt"
 	"html"
 	"os"
-	"strings"
 	"template"
 	"template/parse"
 )
@@ -471,555 +470,3 @@ func (e *escaper) template(name string) *template.Template {
 	}
 	return t
 }
-
-// transitionFunc is the array of context transition functions for text nodes.
-// A transition function takes a context and template text input, and returns
-// the updated context and any unconsumed text.
-var transitionFunc = [...]func(context, []byte) (context, []byte){
-	stateText:        tText,
-	stateTag:         tTag,
-	stateComment:     tComment,
-	stateRCDATA:      tSpecialTagEnd,
-	stateAttr:        tAttr,
-	stateURL:         tURL,
-	stateJS:          tJS,
-	stateJSDqStr:     tJSStr,
-	stateJSSqStr:     tJSStr,
-	stateJSRegexp:    tJSRegexp,
-	stateJSBlockCmt:  tBlockCmt,
-	stateJSLineCmt:   tLineCmt,
-	stateCSS:         tCSS,
-	stateCSSDqStr:    tCSSStr,
-	stateCSSSqStr:    tCSSStr,
-	stateCSSDqURL:    tCSSStr,
-	stateCSSSqURL:    tCSSStr,
-	stateCSSURL:      tCSSStr,
-	stateCSSBlockCmt: tBlockCmt,
-	stateCSSLineCmt:  tLineCmt,
-	stateError:       tError,
-}
-
-var commentStart = []byte("<!--")
-var commentEnd = []byte("-->")
-
-// tText is the context transition function for the text state.
-func tText(c context, s []byte) (context, []byte) {
-	for {
-		i := bytes.IndexByte(s, '<')
-		if i == -1 || i+1 == len(s) {
-			return c, nil
-		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
-			return context{state: stateComment}, s[i+4:]
-		}
-		i++
-		if s[i] == '/' {
-			if i+1 == len(s) {
-				return c, nil
-			}
-			i++
-		}
-		j, e := eatTagName(s, i)
-		if j != i {
-			// We've found an HTML tag.
-			return context{state: stateTag, element: e}, s[j:]
-		}
-		s = s[j:]
-	}
-	panic("unreachable")
-}
-
-var elementContentType = [...]state{
-	elementNone:     stateText,
-	elementScript:   stateJS,
-	elementStyle:    stateCSS,
-	elementTextarea: stateRCDATA,
-	elementTitle:    stateRCDATA,
-}
-
-// tTag is the context transition function for the tag state.
-func tTag(c context, s []byte) (context, []byte) {
-	// Find the attribute name.
-	attrStart := eatWhiteSpace(s, 0)
-	i, err := eatAttrName(s, attrStart)
-	if err != nil {
-		return context{
-			state:  stateError,
-			errStr: err.String(),
-		}, nil
-	}
-	if i == len(s) {
-		return c, nil
-	}
-	state := stateAttr
-	canonAttrName := strings.ToLower(string(s[attrStart:i]))
-	if urlAttr[canonAttrName] {
-		state = stateURL
-	} else if strings.HasPrefix(canonAttrName, "on") {
-		state = stateJS
-	} else if canonAttrName == "style" {
-		state = stateCSS
-	}
-
-	// Look for the start of the value.
-	i = eatWhiteSpace(s, i)
-	if i == len(s) {
-		return c, s[i:]
-	}
-	if s[i] == '>' {
-		state = elementContentType[c.element]
-		return context{state: state, element: c.element}, s[i+1:]
-	} else if s[i] != '=' {
-		// Possible due to a valueless attribute or '/' in "<input />".
-		return c, s[i:]
-	}
-	// Consume the "=".
-	i = eatWhiteSpace(s, i+1)
-
-	// Find the attribute delimiter.
-	delim := delimSpaceOrTagEnd
-	if i < len(s) {
-		switch s[i] {
-		case '\'':
-			delim, i = delimSingleQuote, i+1
-		case '"':
-			delim, i = delimDoubleQuote, i+1
-		}
-	}
-
-	return context{state: state, delim: delim, element: c.element}, s[i:]
-}
-
-// tComment is the context transition function for stateComment.
-func tComment(c context, s []byte) (context, []byte) {
-	i := bytes.Index(s, commentEnd)
-	if i != -1 {
-		return context{}, s[i+3:]
-	}
-	return c, nil
-}
-
-// specialTagEndMarkers maps element types to the character sequence that
-// case-insensitively signals the end of the special tag body.
-var specialTagEndMarkers = [...]string{
-	elementScript:   "</script",
-	elementStyle:    "</style",
-	elementTextarea: "</textarea",
-	elementTitle:    "</title",
-}
-
-// tSpecialTagEnd is the context transition function for raw text and RCDATA
-// element states.
-func tSpecialTagEnd(c context, s []byte) (context, []byte) {
-	if c.element != elementNone {
-		end := specialTagEndMarkers[c.element]
-		i := strings.Index(strings.ToLower(string(s)), end)
-		if i != -1 {
-			return context{state: stateTag}, s[i+len(end):]
-		}
-	}
-	return c, nil
-}
-
-// tAttr is the context transition function for the attribute state.
-func tAttr(c context, s []byte) (context, []byte) {
-	return c, nil
-}
-
-// tURL is the context transition function for the URL state.
-func tURL(c context, s []byte) (context, []byte) {
-	if bytes.IndexAny(s, "#?") >= 0 {
-		c.urlPart = urlPartQueryOrFrag
-	} else if len(s) != 0 && c.urlPart == urlPartNone {
-		c.urlPart = urlPartPreQuery
-	}
-	return c, nil
-}
-
-// tJS is the context transition function for the JS state.
-func tJS(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-
-	i := bytes.IndexAny(s, `"'/`)
-	if i == -1 {
-		// Entire input is non string, comment, regexp tokens.
-		c.jsCtx = nextJSCtx(s, c.jsCtx)
-		return c, nil
-	}
-	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
-	switch s[i] {
-	case '"':
-		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
-	case '\'':
-		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
-	case '/':
-		switch {
-		case i+1 < len(s) && s[i+1] == '/':
-			c.state, i = stateJSLineCmt, i+1
-		case i+1 < len(s) && s[i+1] == '*':
-			c.state, i = stateJSBlockCmt, i+1
-		case c.jsCtx == jsCtxRegexp:
-			c.state = stateJSRegexp
-		case c.jsCtx == jsCtxDivOp:
-			c.jsCtx = jsCtxRegexp
-		default:
-			return context{
-				state:  stateError,
-				errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
-			}, nil
-		}
-	default:
-		panic("unreachable")
-	}
-	return c, s[i+1:]
-}
-
-// tJSStr is the context transition function for the JS string states.
-func tJSStr(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-
-	quoteAndEsc := `\"`
-	if c.state == stateJSSqStr {
-		quoteAndEsc = `\'`
-	}
-
-	b := s
-	for {
-		i := bytes.IndexAny(b, quoteAndEsc)
-		if i == -1 {
-			return c, nil
-		}
-		if b[i] == '\\' {
-			i++
-			if i == len(b) {
-				return context{
-					state:  stateError,
-					errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
-				}, nil
-			}
-		} else {
-			c.state, c.jsCtx = stateJS, jsCtxDivOp
-			return c, b[i+1:]
-		}
-		b = b[i+1:]
-	}
-	panic("unreachable")
-}
-
-// tJSRegexp is the context transition function for the /RegExp/ literal state.
-func tJSRegexp(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-
-	b := s
-	inCharset := false
-	for {
-		i := bytes.IndexAny(b, `/[\]`)
-		if i == -1 {
-			break
-		}
-		switch b[i] {
-		case '/':
-			if !inCharset {
-				c.state, c.jsCtx = stateJS, jsCtxDivOp
-				return c, b[i+1:]
-			}
-		case '\\':
-			i++
-			if i == len(b) {
-				return context{
-					state:  stateError,
-					errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
-				}, nil
-			}
-		case '[':
-			inCharset = true
-		case ']':
-			inCharset = false
-		default:
-			panic("unreachable")
-		}
-		b = b[i+1:]
-	}
-
-	if inCharset {
-		// This can be fixed by making context richer if interpolation
-		// into charsets is desired.
-		return context{
-			state:  stateError,
-			errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
-		}, nil
-	}
-
-	return c, nil
-}
-
-var blockCommentEnd = []byte("*/")
-
-// tBlockCmt is the context transition function for /*comment*/ states.
-func tBlockCmt(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-	i := bytes.Index(s, blockCommentEnd)
-	if i == -1 {
-		return c, nil
-	}
-	switch c.state {
-	case stateJSBlockCmt:
-		c.state = stateJS
-	case stateCSSBlockCmt:
-		c.state = stateCSS
-	default:
-		panic(c.state.String())
-	}
-	return c, s[i+2:]
-}
-
-// tLineCmt is the context transition function for //comment states.
-func tLineCmt(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-	var lineTerminators string
-	var endState state
-	switch c.state {
-	case stateJSLineCmt:
-		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
-	case stateCSSLineCmt:
-		lineTerminators, endState = "\n\f\r", stateCSS
-		// Line comments are not part of any published CSS standard but
-		// are supported by the 4 major browsers.
-		// This defines line comments as
-		//     LINECOMMENT ::= "//" [^\n\f\d]*
-		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
-		// newlines:
-		//     nl ::= #xA | #xD #xA | #xD | #xC
-	default:
-		panic(c.state.String())
-	}
-
-	i := bytes.IndexAny(s, lineTerminators)
-	if i == -1 {
-		return c, nil
-	}
-	c.state = endState
-	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
-	// "However, the LineTerminator at the end of the line is not
-	// considered to be part of the single-line comment; it is recognised
-	// separately by the lexical grammar and becomes part of the stream of
-	// input elements for the syntactic grammar."
-	return c, s[i:]
-}
-
-// tCSS is the context transition function for the CSS state.
-func tCSS(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-
-	// CSS quoted strings are almost never used except for:
-	// (1) URLs as in background: "/foo.png"
-	// (2) Multiword font-names as in font-family: "Times New Roman"
-	// (3) List separators in content values as in inline-lists:
-	//    <style>
-	//    ul.inlineList { list-style: none; padding:0 }
-	//    ul.inlineList > li { display: inline }
-	//    ul.inlineList > li:before { content: ", " }
-	//    ul.inlineList > li:first-child:before { content: "" }
-	//    </style>
-	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
-	// (4) Attribute value selectors as in a[href="http://example.com/"]
-	//
-	// We conservatively treat all strings as URLs, but make some
-	// allowances to avoid confusion.
-	//
-	// In (1), our conservative assumption is justified.
-	// In (2), valid font names do not contain ':', '?', or '#', so our
-	// conservative assumption is fine since we will never transition past
-	// urlPartPreQuery.
-	// In (3), our protocol heuristic should not be tripped, and there
-	// should not be non-space content after a '?' or '#', so as long as
-	// we only %-encode RFC 3986 reserved characters we are ok.
-	// In (4), we should URL escape for URL attributes, and for others we
-	// have the attribute name available if our conservative assumption
-	// proves problematic for real code.
-
-	for {
-		i := bytes.IndexAny(s, `("'/`)
-		if i == -1 {
-			return c, nil
-		}
-		switch s[i] {
-		case '(':
-			// Look for url to the left.
-			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
-			if endsWithCSSKeyword(p, "url") {
-				q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
-				switch {
-				case len(q) != 0 && q[0] == '"':
-					c.state, s = stateCSSDqURL, q[1:]
-				case len(q) != 0 && q[0] == '\'':
-					c.state, s = stateCSSSqURL, q[1:]
-
-				default:
-					c.state, s = stateCSSURL, q
-				}
-				return c, s
-			}
-		case '/':
-			if i+1 < len(s) {
-				switch s[i+1] {
-				case '/':
-					c.state = stateCSSLineCmt
-					return c, s[i+2:]
-				case '*':
-					c.state = stateCSSBlockCmt
-					return c, s[i+2:]
-				}
-			}
-		case '"':
-			c.state = stateCSSDqStr
-			return c, s[i+1:]
-		case '\'':
-			c.state = stateCSSSqStr
-			return c, s[i+1:]
-		}
-		s = s[i+1:]
-	}
-	panic("unreachable")
-}
-
-// tCSSStr is the context transition function for the CSS string and URL states.
-func tCSSStr(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
-	}
-
-	var endAndEsc string
-	switch c.state {
-	case stateCSSDqStr, stateCSSDqURL:
-		endAndEsc = `\"`
-	case stateCSSSqStr, stateCSSSqURL:
-		endAndEsc = `\'`
-	case stateCSSURL:
-		// Unquoted URLs end with a newline or close parenthesis.
-		// The below includes the wc (whitespace character) and nl.
-		endAndEsc = "\\\t\n\f\r )"
-	default:
-		panic(c.state.String())
-	}
-
-	b := s
-	for {
-		i := bytes.IndexAny(b, endAndEsc)
-		if i == -1 {
-			return tURL(c, decodeCSS(b))
-		}
-		if b[i] == '\\' {
-			i++
-			if i == len(b) {
-				return context{
-					state:  stateError,
-					errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
-				}, nil
-			}
-		} else {
-			c.state = stateCSS
-			return c, b[i+1:]
-		}
-		c, _ = tURL(c, decodeCSS(b[:i+1]))
-		b = b[i+1:]
-	}
-	panic("unreachable")
-}
-
-// tError is the context transition function for the error state.
-func tError(c context, s []byte) (context, []byte) {
-	return c, nil
-}
-
-// eatAttrName returns the largest j such that s[i:j] is an attribute name.
-// It returns an error if s[i:] does not look like it begins with an
-// attribute name, such as encountering a quote mark without a preceding
-// equals sign.
-func eatAttrName(s []byte, i int) (int, os.Error) {
-	for j := i; j < len(s); j++ {
-		switch s[j] {
-		case ' ', '\t', '\n', '\f', '\r', '=', '>':
-			return j, nil
-		case '\'', '"', '<':
-			// These result in a parse warning in HTML5 and are
-			// indicative of serious problems if seen in an attr
-			// name in a template.
-			return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
-		default:
-			// No-op.
-		}
-	}
-	return len(s), nil
-}
-
-var elementNameMap = map[string]element{
-	"script":   elementScript,
-	"style":    elementStyle,
-	"textarea": elementTextarea,
-	"title":    elementTitle,
-}
-
-// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
-func eatTagName(s []byte, i int) (int, element) {
-	j := i
-	for ; j < len(s); j++ {
-		x := s[j]
-		if !(('a' <= x && x <= 'z') ||
-			('A' <= x && x <= 'Z') ||
-			('0' <= x && x <= '9' && i != j)) {
-			break
-		}
-	}
-	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
-}
-
-// eatWhiteSpace returns the largest j such that s[i:j] is white space.
-func eatWhiteSpace(s []byte, i int) int {
-	for j := i; j < len(s); j++ {
-		switch s[j] {
-		case ' ', '\t', '\n', '\f', '\r':
-			// No-op.
-		default:
-			return j
-		}
-	}
-	return len(s)
-}
-
-// urlAttr is the set of attribute names whose values are URLs.
-// It consists of all "%URI"-typed attributes from
-// http://www.w3.org/TR/html4/index/attributes.html
-// as well as those attributes defined at
-// http://dev.w3.org/html5/spec/index.html#attributes-1
-// whose Value column in that table matches
-// "Valid [non-empty] URL potentially surrounded by spaces".
-var urlAttr = map[string]bool{
-	"action":     true,
-	"archive":    true,
-	"background": true,
-	"cite":       true,
-	"classid":    true,
-	"codebase":   true,
-	"data":       true,
-	"formaction": true,
-	"href":       true,
-	"icon":       true,
-	"longdesc":   true,
-	"manifest":   true,
-	"poster":     true,
-	"profile":    true,
-	"src":        true,
-	"usemap":     true,
-}
--- a/src/pkg/exp/template/html/transition.go
+++ b/src/pkg/exp/template/html/transition.go
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and any unconsumed text.
+var transitionFunc = [...]func(context, []byte) (context, []byte){
+	stateText:        tText,
+	stateTag:         tTag,
+	stateComment:     tComment,
+	stateRCDATA:      tSpecialTagEnd,
+	stateAttr:        tAttr,
+	stateURL:         tURL,
+	stateJS:          tJS,
+	stateJSDqStr:     tJSStr,
+	stateJSSqStr:     tJSStr,
+	stateJSRegexp:    tJSRegexp,
+	stateJSBlockCmt:  tBlockCmt,
+	stateJSLineCmt:   tLineCmt,
+	stateCSS:         tCSS,
+	stateCSSDqStr:    tCSSStr,
+	stateCSSSqStr:    tCSSStr,
+	stateCSSDqURL:    tCSSStr,
+	stateCSSSqURL:    tCSSStr,
+	stateCSSURL:      tCSSStr,
+	stateCSSBlockCmt: tBlockCmt,
+	stateCSSLineCmt:  tLineCmt,
+	stateError:       tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, []byte) {
+	for {
+		i := bytes.IndexByte(s, '<')
+		if i == -1 || i+1 == len(s) {
+			return c, nil
+		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+			return context{state: stateComment}, s[i+4:]
+		}
+		i++
+		if s[i] == '/' {
+			if i+1 == len(s) {
+				return c, nil
+			}
+			i++
+		}
+		j, e := eatTagName(s, i)
+		if j != i {
+			// We've found an HTML tag.
+			return context{state: stateTag, element: e}, s[j:]
+		}
+		s = s[j:]
+	}
+	panic("unreachable")
+}
+
+var elementContentType = [...]state{
+	elementNone:     stateText,
+	elementScript:   stateJS,
+	elementStyle:    stateCSS,
+	elementTextarea: stateRCDATA,
+	elementTitle:    stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, []byte) {
+	// Find the attribute name.
+	attrStart := eatWhiteSpace(s, 0)
+	i, err := eatAttrName(s, attrStart)
+	if err != nil {
+		return context{
+			state:  stateError,
+			errStr: err.String(),
+		}, nil
+	}
+	if i == len(s) {
+		return c, nil
+	}
+	state := stateAttr
+	canonAttrName := strings.ToLower(string(s[attrStart:i]))
+	if urlAttr[canonAttrName] {
+		state = stateURL
+	} else if strings.HasPrefix(canonAttrName, "on") {
+		state = stateJS
+	} else if canonAttrName == "style" {
+		state = stateCSS
+	}
+
+	// Look for the start of the value.
+	i = eatWhiteSpace(s, i)
+	if i == len(s) {
+		return c, s[i:]
+	}
+	if s[i] == '>' {
+		state = elementContentType[c.element]
+		return context{state: state, element: c.element}, s[i+1:]
+	} else if s[i] != '=' {
+		// Possible due to a valueless attribute or '/' in "<input />".
+		return c, s[i:]
+	}
+	// Consume the "=".
+	i = eatWhiteSpace(s, i+1)
+
+	// Find the attribute delimiter.
+	delim := delimSpaceOrTagEnd
+	if i < len(s) {
+		switch s[i] {
+		case '\'':
+			delim, i = delimSingleQuote, i+1
+		case '"':
+			delim, i = delimDoubleQuote, i+1
+		}
+	}
+
+	return context{state: state, delim: delim, element: c.element}, s[i:]
+}
+
+// tComment is the context transition function for stateComment.
+func tComment(c context, s []byte) (context, []byte) {
+	i := bytes.Index(s, commentEnd)
+	if i != -1 {
+		return context{}, s[i+3:]
+	}
+	return c, nil
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...]string{
+	elementScript:   "</script",
+	elementStyle:    "</style",
+	elementTextarea: "</textarea",
+	elementTitle:    "</title",
+}
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, []byte) {
+	if c.element != elementNone {
+		end := specialTagEndMarkers[c.element]
+		i := strings.Index(strings.ToLower(string(s)), end)
+		if i != -1 {
+			return context{state: stateTag}, s[i+len(end):]
+		}
+	}
+	return c, nil
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, []byte) {
+	return c, nil
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, []byte) {
+	if bytes.IndexAny(s, "#?") >= 0 {
+		c.urlPart = urlPartQueryOrFrag
+	} else if len(s) != 0 && c.urlPart == urlPartNone {
+		c.urlPart = urlPartPreQuery
+	}
+	return c, nil
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+
+	i := bytes.IndexAny(s, `"'/`)
+	if i == -1 {
+		// Entire input is non string, comment, regexp tokens.
+		c.jsCtx = nextJSCtx(s, c.jsCtx)
+		return c, nil
+	}
+	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+	switch s[i] {
+	case '"':
+		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+	case '\'':
+		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+	case '/':
+		switch {
+		case i+1 < len(s) && s[i+1] == '/':
+			c.state, i = stateJSLineCmt, i+1
+		case i+1 < len(s) && s[i+1] == '*':
+			c.state, i = stateJSBlockCmt, i+1
+		case c.jsCtx == jsCtxRegexp:
+			c.state = stateJSRegexp
+		case c.jsCtx == jsCtxDivOp:
+			c.jsCtx = jsCtxRegexp
+		default:
+			return context{
+				state:  stateError,
+				errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
+			}, nil
+		}
+	default:
+		panic("unreachable")
+	}
+	return c, s[i+1:]
+}
+
+// tJSStr is the context transition function for the JS string states.
+func tJSStr(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+
+	quoteAndEsc := `\"`
+	if c.state == stateJSSqStr {
+		quoteAndEsc = `\'`
+	}
+
+	b := s
+	for {
+		i := bytes.IndexAny(b, quoteAndEsc)
+		if i == -1 {
+			return c, nil
+		}
+		if b[i] == '\\' {
+			i++
+			if i == len(b) {
+				return context{
+					state:  stateError,
+					errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
+				}, nil
+			}
+		} else {
+			c.state, c.jsCtx = stateJS, jsCtxDivOp
+			return c, b[i+1:]
+		}
+		b = b[i+1:]
+	}
+	panic("unreachable")
+}
+
+// tJSRegexp is the context transition function for the /RegExp/ literal state.
+func tJSRegexp(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+
+	b := s
+	inCharset := false
+	for {
+		i := bytes.IndexAny(b, `/[\]`)
+		if i == -1 {
+			break
+		}
+		switch b[i] {
+		case '/':
+			if !inCharset {
+				c.state, c.jsCtx = stateJS, jsCtxDivOp
+				return c, b[i+1:]
+			}
+		case '\\':
+			i++
+			if i == len(b) {
+				return context{
+					state:  stateError,
+					errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
+				}, nil
+			}
+		case '[':
+			inCharset = true
+		case ']':
+			inCharset = false
+		default:
+			panic("unreachable")
+		}
+		b = b[i+1:]
+	}
+
+	if inCharset {
+		// This can be fixed by making context richer if interpolation
+		// into charsets is desired.
+		return context{
+			state:  stateError,
+			errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
+		}, nil
+	}
+
+	return c, nil
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+	i := bytes.Index(s, blockCommentEnd)
+	if i == -1 {
+		return c, nil
+	}
+	switch c.state {
+	case stateJSBlockCmt:
+		c.state = stateJS
+	case stateCSSBlockCmt:
+		c.state = stateCSS
+	default:
+		panic(c.state.String())
+	}
+	return c, s[i+2:]
+}
+
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+	var lineTerminators string
+	var endState state
+	switch c.state {
+	case stateJSLineCmt:
+		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+	case stateCSSLineCmt:
+		lineTerminators, endState = "\n\f\r", stateCSS
+		// Line comments are not part of any published CSS standard but
+		// are supported by the 4 major browsers.
+		// This defines line comments as
+		//     LINECOMMENT ::= "//" [^\n\f\d]*
+		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+		// newlines:
+		//     nl ::= #xA | #xD #xA | #xD | #xC
+	default:
+		panic(c.state.String())
+	}
+
+	i := bytes.IndexAny(s, lineTerminators)
+	if i == -1 {
+		return c, nil
+	}
+	c.state = endState
+	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+	// "However, the LineTerminator at the end of the line is not
+	// considered to be part of the single-line comment; it is recognised
+	// separately by the lexical grammar and becomes part of the stream of
+	// input elements for the syntactic grammar."
+	return c, s[i:]
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+
+	// CSS quoted strings are almost never used except for:
+	// (1) URLs as in background: "/foo.png"
+	// (2) Multiword font-names as in font-family: "Times New Roman"
+	// (3) List separators in content values as in inline-lists:
+	//    <style>
+	//    ul.inlineList { list-style: none; padding:0 }
+	//    ul.inlineList > li { display: inline }
+	//    ul.inlineList > li:before { content: ", " }
+	//    ul.inlineList > li:first-child:before { content: "" }
+	//    </style>
+	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
+	// (4) Attribute value selectors as in a[href="http://example.com/"]
+	//
+	// We conservatively treat all strings as URLs, but make some
+	// allowances to avoid confusion.
+	//
+	// In (1), our conservative assumption is justified.
+	// In (2), valid font names do not contain ':', '?', or '#', so our
+	// conservative assumption is fine since we will never transition past
+	// urlPartPreQuery.
+	// In (3), our protocol heuristic should not be tripped, and there
+	// should not be non-space content after a '?' or '#', so as long as
+	// we only %-encode RFC 3986 reserved characters we are ok.
+	// In (4), we should URL escape for URL attributes, and for others we
+	// have the attribute name available if our conservative assumption
+	// proves problematic for real code.
+
+	for {
+		i := bytes.IndexAny(s, `("'/`)
+		if i == -1 {
+			return c, nil
+		}
+		switch s[i] {
+		case '(':
+			// Look for url to the left.
+			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+			if endsWithCSSKeyword(p, "url") {
+				q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+				switch {
+				case len(q) != 0 && q[0] == '"':
+					c.state, s = stateCSSDqURL, q[1:]
+				case len(q) != 0 && q[0] == '\'':
+					c.state, s = stateCSSSqURL, q[1:]
+
+				default:
+					c.state, s = stateCSSURL, q
+				}
+				return c, s
+			}
+		case '/':
+			if i+1 < len(s) {
+				switch s[i+1] {
+				case '/':
+					c.state = stateCSSLineCmt
+					return c, s[i+2:]
+				case '*':
+					c.state = stateCSSBlockCmt
+					return c, s[i+2:]
+				}
+			}
+		case '"':
+			c.state = stateCSSDqStr
+			return c, s[i+1:]
+		case '\'':
+			c.state = stateCSSSqStr
+			return c, s[i+1:]
+		}
+		s = s[i+1:]
+	}
+	panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, []byte) {
+	if d, t := tSpecialTagEnd(c, s); t != nil {
+		return d, t
+	}
+
+	var endAndEsc string
+	switch c.state {
+	case stateCSSDqStr, stateCSSDqURL:
+		endAndEsc = `\"`
+	case stateCSSSqStr, stateCSSSqURL:
+		endAndEsc = `\'`
+	case stateCSSURL:
+		// Unquoted URLs end with a newline or close parenthesis.
+		// The below includes the wc (whitespace character) and nl.
+		endAndEsc = "\\\t\n\f\r )"
+	default:
+		panic(c.state.String())
+	}
+
+	b := s
+	for {
+		i := bytes.IndexAny(b, endAndEsc)
+		if i == -1 {
+			return tURL(c, decodeCSS(b))
+		}
+		if b[i] == '\\' {
+			i++
+			if i == len(b) {
+				return context{
+					state:  stateError,
+					errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
+				}, nil
+			}
+		} else {
+			c.state = stateCSS
+			return c, b[i+1:]
+		}
+		c, _ = tURL(c, decodeCSS(b[:i+1]))
+		b = b[i+1:]
+	}
+	panic("unreachable")
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, []byte) {
+	return c, nil
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, os.Error) {
+	for j := i; j < len(s); j++ {
+		switch s[j] {
+		case ' ', '\t', '\n', '\f', '\r', '=', '>':
+			return j, nil
+		case '\'', '"', '<':
+			// These result in a parse warning in HTML5 and are
+			// indicative of serious problems if seen in an attr
+			// name in a template.
+			return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
+		default:
+			// No-op.
+		}
+	}
+	return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+	"script":   elementScript,
+	"style":    elementStyle,
+	"textarea": elementTextarea,
+	"title":    elementTitle,
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+	j := i
+	for ; j < len(s); j++ {
+		x := s[j]
+		if !(('a' <= x && x <= 'z') ||
+			('A' <= x && x <= 'Z') ||
+			('0' <= x && x <= '9' && i != j)) {
+			break
+		}
+	}
+	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+	for j := i; j < len(s); j++ {
+		switch s[j] {
+		case ' ', '\t', '\n', '\f', '\r':
+			// No-op.
+		default:
+			return j
+		}
+	}
+	return len(s)
+}
+
+// urlAttr is the set of attribute names whose values are URLs.
+// It consists of all "%URI"-typed attributes from
+// http://www.w3.org/TR/html4/index/attributes.html
+// as well as those attributes defined at
+// http://dev.w3.org/html5/spec/index.html#attributes-1
+// whose Value column in that table matches
+// "Valid [non-empty] URL potentially surrounded by spaces".
+var urlAttr = map[string]bool{
+	"action":     true,
+	"archive":    true,
+	"background": true,
+	"cite":       true,
+	"classid":    true,
+	"codebase":   true,
+	"data":       true,
+	"formaction": true,
+	"href":       true,
+	"icon":       true,
+	"longdesc":   true,
+	"manifest":   true,
+	"poster":     true,
+	"profile":    true,
+	"src":        true,
+	"usemap":     true,
+}