Commit 2aa589c8 authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

html: implement foster parenting

Implement the foster-parenting algorithm for content that is inside a table
but not in a cell.

Also fix a bug in reconstructing the active formatting elements.

Pass test 30 in tests1.dat:
<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y

R=nigeltao
CC=golang-dev
https://golang.org/cl/5309052
parent 2f352ae4
...@@ -32,6 +32,9 @@ type parser struct { ...@@ -32,6 +32,9 @@ type parser struct {
// originalIM is the insertion mode to go back to after completing a text // originalIM is the insertion mode to go back to after completing a text
// or inTableText insertion mode. // or inTableText insertion mode.
originalIM insertionMode originalIM insertionMode
// fosterParenting is whether new elements should be inserted according to
// the foster parenting rules (section 11.2.5.3).
fosterParenting bool
} }
func (p *parser) top() *Node { func (p *parser) top() *Node {
...@@ -103,12 +106,56 @@ func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool { ...@@ -103,12 +106,56 @@ func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
// addChild adds a child node n to the top element, and pushes n onto the stack // addChild adds a child node n to the top element, and pushes n onto the stack
// of open elements if it is an element node. // of open elements if it is an element node.
func (p *parser) addChild(n *Node) { func (p *parser) addChild(n *Node) {
if p.fosterParenting {
p.fosterParent(n)
} else {
p.top().Add(n) p.top().Add(n)
}
if n.Type == ElementNode { if n.Type == ElementNode {
p.oe = append(p.oe, n) p.oe = append(p.oe, n)
} }
} }
// fosterParent adds a child node according to the foster parenting rules.
// Section 11.2.5.3, "foster parenting".
func (p *parser) fosterParent(n *Node) {
var table, parent *Node
var i int
for i = len(p.oe) - 1; i >= 0; i-- {
if p.oe[i].Data == "table" {
table = p.oe[i]
break
}
}
if table == nil {
// The foster parent is the html element.
parent = p.oe[0]
} else {
parent = table.Parent
}
if parent == nil {
parent = p.oe[i-1]
}
var child *Node
for i, child = range parent.Child {
if child == table {
break
}
}
if i == len(parent.Child) {
parent.Add(n)
} else {
// Insert n into parent.Child at index i.
parent.Child = append(parent.Child[:i+1], parent.Child[i:]...)
parent.Child[i] = n
n.Parent = parent
}
}
// addText adds text to the preceding node if it is a text node, or else it // addText adds text to the preceding node if it is a text node, or else it
// calls addChild with a new text node. // calls addChild with a new text node.
func (p *parser) addText(text string) { func (p *parser) addText(text string) {
...@@ -170,9 +217,9 @@ func (p *parser) reconstructActiveFormattingElements() { ...@@ -170,9 +217,9 @@ func (p *parser) reconstructActiveFormattingElements() {
} }
for { for {
i++ i++
n = p.afe[i] clone := p.afe[i].clone()
p.addChild(n.clone()) p.addChild(clone)
p.afe[i] = n p.afe[i] = clone
if i == len(p.afe)-1 { if i == len(p.afe)-1 {
break break
} }
...@@ -536,10 +583,7 @@ func inBodyIM(p *parser) (insertionMode, bool) { ...@@ -536,10 +583,7 @@ func inBodyIM(p *parser) (insertionMode, bool) {
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u": case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data) p.inBodyEndTagFormatting(p.tok.Data)
default: default:
// TODO: any other end tag p.inBodyEndTagOther(p.tok.Data)
if p.tok.Data == p.top().Data {
p.oe.pop()
}
} }
case CommentToken: case CommentToken:
p.addChild(&Node{ p.addChild(&Node{
...@@ -573,6 +617,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) { ...@@ -573,6 +617,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
} }
} }
if formattingElement == nil { if formattingElement == nil {
p.inBodyEndTagOther(tag)
return return
} }
feIndex := p.oe.index(formattingElement) feIndex := p.oe.index(formattingElement)
...@@ -645,8 +690,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) { ...@@ -645,8 +690,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
} }
switch commonAncestor.Data { switch commonAncestor.Data {
case "table", "tbody", "tfoot", "thead", "tr": case "table", "tbody", "tfoot", "thead", "tr":
// TODO: fix up misnested table nodes; find the foster parent. p.fosterParent(lastNode)
fallthrough
default: default:
commonAncestor.Add(lastNode) commonAncestor.Add(lastNode)
} }
...@@ -667,6 +711,19 @@ func (p *parser) inBodyEndTagFormatting(tag string) { ...@@ -667,6 +711,19 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
} }
} }
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
func (p *parser) inBodyEndTagOther(tag string) {
for i := len(p.oe) - 1; i >= 0; i-- {
if p.oe[i].Data == tag {
p.oe = p.oe[:i]
break
}
if isSpecialElement[p.oe[i].Data] {
break
}
}
}
// Section 11.2.5.4.8. // Section 11.2.5.4.8.
func textIM(p *parser) (insertionMode, bool) { func textIM(p *parser) (insertionMode, bool) {
switch p.tok.Type { switch p.tok.Type {
...@@ -683,12 +740,6 @@ func textIM(p *parser) (insertionMode, bool) { ...@@ -683,12 +740,6 @@ func textIM(p *parser) (insertionMode, bool) {
// Section 11.2.5.4.9. // Section 11.2.5.4.9.
func inTableIM(p *parser) (insertionMode, bool) { func inTableIM(p *parser) (insertionMode, bool) {
var (
add bool
data string
attr []Attribute
consumed bool
)
switch p.tok.Type { switch p.tok.Type {
case ErrorToken: case ErrorToken:
// Stop parsing. // Stop parsing.
...@@ -698,13 +749,19 @@ func inTableIM(p *parser) (insertionMode, bool) { ...@@ -698,13 +749,19 @@ func inTableIM(p *parser) (insertionMode, bool) {
case StartTagToken: case StartTagToken:
switch p.tok.Data { switch p.tok.Data {
case "tbody", "tfoot", "thead": case "tbody", "tfoot", "thead":
add = true p.clearStackToTableContext()
data = p.tok.Data p.addElement(p.tok.Data, p.tok.Attr)
attr = p.tok.Attr return inTableBodyIM, true
consumed = true
case "td", "th", "tr": case "td", "th", "tr":
add = true p.clearStackToTableContext()
data = "tbody" p.addElement("tbody", nil)
return inTableBodyIM, false
case "table":
if p.popUntil(tableScopeStopTags, "table") {
return p.resetInsertionMode(), false
}
// Ignore the token.
return inTableIM, true
default: default:
// TODO. // TODO.
} }
...@@ -727,13 +784,23 @@ func inTableIM(p *parser) (insertionMode, bool) { ...@@ -727,13 +784,23 @@ func inTableIM(p *parser) (insertionMode, bool) {
}) })
return inTableIM, true return inTableIM, true
} }
if add {
// TODO: clear the stack back to a table context. switch p.top().Data {
p.addElement(data, attr) case "table", "tbody", "tfoot", "thead", "tr":
return inTableBodyIM, consumed p.fosterParenting = true
defer func() { p.fosterParenting = false }()
}
return useTheRulesFor(p, inTableIM, inBodyIM)
}
func (p *parser) clearStackToTableContext() {
for i := len(p.oe) - 1; i >= 0; i-- {
if x := p.oe[i].Data; x == "table" || x == "html" {
p.oe = p.oe[:i+1]
return
}
} }
// TODO: return useTheRulesFor(inTableIM, inBodyIM, p) unless etc. etc. foster parenting.
return inTableIM, true
} }
// Section 11.2.5.4.13. // Section 11.2.5.4.13.
......
...@@ -123,7 +123,7 @@ func TestParser(t *testing.T) { ...@@ -123,7 +123,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader) rc := make(chan io.Reader)
go readDat(filename, rc) go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset. // TODO(nigeltao): Process all test cases, not just a subset.
for i := 0; i < 30; i++ { for i := 0; i < 31; i++ {
// Parse the #data section. // Parse the #data section.
b, err := ioutil.ReadAll(<-rc) b, err := ioutil.ReadAll(<-rc)
if err != nil { if err != nil {
...@@ -152,6 +152,13 @@ func TestParser(t *testing.T) { ...@@ -152,6 +152,13 @@ func TestParser(t *testing.T) {
continue continue
} }
// Check that rendering and re-parsing results in an identical tree. // Check that rendering and re-parsing results in an identical tree.
if filename == "tests1.dat" && i == 30 {
// Test 30 in tests1.dat is such messed-up markup that a correct parse
// results in a non-conforming tree (one <a> element nested inside another).
// Therefore when it is rendered and re-parsed, it isn't the same.
// So we skip rendering on that test.
continue
}
pr, pw := io.Pipe() pr, pw := io.Pipe()
go func() { go func() {
pw.CloseWithError(Render(pw, doc)) pw.CloseWithError(Render(pw, doc))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment