Is the problem that parsing the <table><tr> is failing, or is it a
question of how to walk a correctly-parsed tree of DOM Nodes to
extract the rows?
package main
import (
"exp/html"
"fmt"
"log"
"os"
"strings"
)
const data = `<html><body>
foo bar
<table>
<tr><td>a1</td><td>b1 is <b>bold</b></td></tr>
<tr><td>a2</td><td>b2</td></tr>
</table>
baz
</body></html>`
const (
nothing = iota
inTable
inTR
inTD
)
func main() {
n, err := html.Parse(strings.NewReader(data))
if err != nil {
log.Fatal(err)
}
walk(n, nothing)
}
func walk(n *html.Node, state int) {
if state == inTD {
html.Render(os.Stdout, n)
return
}
hasTD := false
if n.Type == html.ElementNode {
switch {
case state == nothing && n.Data == "table":
state++
fmt.Println("---- table ----")
case state == inTable && n.Data == "tr":
state++
fmt.Println("-- row --")
case state == inTR && n.Data == "td":
state++
hasTD = true
}
}
for _, c := range n.Child {
walk(c, state)
}
if hasTD {
fmt.Println()
}
}
Output:
$ go run main.go
---- table ----
-- row --
a1
b1 is <b>bold</b>
-- row --
a2
b2
Thanks for the report. I'll see about tracking it down.
Do you mind verifying you don't still see the problem?