diff --git a/html/token.go b/html/token.go
index 3c57880..6598c1f 100644
--- a/html/token.go
+++ b/html/token.go
@@ -839,8 +839,22 @@
if raw {
z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
}
- // Look for a self-closing token like "<br/>".
- if z.err == nil && z.buf[z.raw.end-2] == '/' {
+ // Look for a self-closing token (e.g. <br/>).
+ //
+ // Originally, we did this by just checking that the last character of the
+ // tag (ignoring the closing bracket) was a solidus (/) character, but this
+ // is not always accurate.
+ //
+ // We need to be careful that we don't misinterpret a non-self-closing tag
+ // as self-closing, as can happen if the tag contains unquoted attribute
+ // values (i.e. <p a=/>).
+ //
+ // To avoid this, we check that the last non-bracket character of the tag
+ // (z.raw.end-2) isn't the same character as the last non-quote character of
+ // the last attribute of the tag (z.pendingAttr[1].end-1), if the tag has
+ // attributes.
+ nAttrs := len(z.attr)
+ if z.err == nil && z.buf[z.raw.end-2] == '/' && (nAttrs == 0 || z.raw.end-2 != z.attr[nAttrs-1][1].end-1) {
return SelfClosingTagToken
}
return StartTagToken
diff --git a/html/token_test.go b/html/token_test.go
index a36d112..44773f1 100644
--- a/html/token_test.go
+++ b/html/token_test.go
@@ -616,6 +616,16 @@
`<p a/ ="">`,
`<p a="" =""="">`,
},
+ {
+ "slash at end of unquoted attribute value",
+ `<p a="\">`,
+ `<p a="\">`,
+ },
+ {
+ "self-closing tag with attribute",
+ `<p a=/>`,
+ `<p a="/">`,
+ },
}
func TestTokenizer(t *testing.T) {
@@ -815,6 +825,14 @@
}
}
+func TestSelfClosingTagValueConfusion(t *testing.T) {
+ z := NewTokenizer(strings.NewReader(`<p a=/>`))
+ tok := z.Next()
+ if tok != StartTagToken {
+ t.Fatalf("unexpected token type: got %s, want %s", tok, StartTagToken)
+ }
+}
+
// zeroOneByteReader is like a strings.Reader that alternates between
// returning 0 bytes and 1 byte at a time.
type zeroOneByteReader struct {