Commit 1d0c141d authored by Nigel Tao's avatar Nigel Tao

html: parse doctype tokens; merge adjacent text nodes.

The test case input is "<!DOCTYPE html><span><button>foo</span>bar".
The correct parse is:
| <!DOCTYPE html>
| <html>
|   <head>
|   <body>
|     <span>
|       <button>
|         "foobar"

R=gri
CC=golang-dev
https://golang.org/cl/4794063
parent 4d2766e9
......@@ -13,6 +13,7 @@ const (
DocumentNode
ElementNode
CommentNode
DoctypeNode
scopeMarkerNode
)
......
......@@ -81,8 +81,8 @@ func (p *parser) popUntil(stopTags []string, matchTags ...string) bool {
return false
}
// addChild adds a child node n to the top element, and pushes n if it is an
// element node (text nodes are not part of the stack of open elements).
// addChild adds a child node n to the top element, and pushes n onto the stack
// of open elements if it is an element node.
func (p *parser) addChild(n *Node) {
p.top().Add(n)
if n.Type == ElementNode {
......@@ -90,10 +90,15 @@ func (p *parser) addChild(n *Node) {
}
}
// addText calls addChild with a text node.
// addText adds text to the preceding node if it is a text node, or else it
// calls addChild with a new text node.
func (p *parser) addText(text string) {
// TODO: merge s with previous text, if the preceding node is a text node.
// TODO: distinguish whitespace text from others.
t := p.top()
if i := len(t.Child); i > 0 && t.Child[i-1].Type == TextNode {
t.Child[i-1].Data += text
return
}
p.addChild(&Node{
Type: TextNode,
Data: text,
......@@ -201,7 +206,15 @@ func useTheRulesFor(p *parser, actual, delegate insertionMode) (insertionMode, b
// Section 11.2.5.4.1.
func initialIM(p *parser) (insertionMode, bool) {
// TODO: check p.tok for DOCTYPE.
if p.tok.Type == DoctypeToken {
p.addChild(&Node{
Type: DoctypeNode,
Data: p.tok.Data,
})
return beforeHTMLIM, true
}
// TODO: set "quirks mode"? It's defined in the DOM spec instead of HTML5 proper,
// and so switching on "quirks mode" might belong in a different package.
return beforeHTMLIM, false
}
......
......@@ -85,6 +85,8 @@ func dumpLevel(w io.Writer, n *Node, level int) os.Error {
fmt.Fprintf(w, "%q", EscapeString(n.Data))
case CommentNode:
return os.NewError("COMMENT")
case DoctypeNode:
fmt.Fprintf(w, "<!DOCTYPE %s>", EscapeString(n.Data))
case scopeMarkerNode:
return os.NewError("unexpected scopeMarkerNode")
default:
......@@ -121,7 +123,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
for i := 0; i < 23; i++ {
for i := 0; i < 25; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment