>>> import bs4
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<redacted>/lib/python3.5/site-packages/bs4/__init__.py", line 30, in <module>
from .builder import builder_registry, ParserRejectedMarkup
File "/<redacted>/lib/python3.5/site-packages/bs4/builder/__init__.py", line 308, in <module>
from . import _htmlparser
File "<redacted>/lib/python3.5/site-packages/bs4/builder/_htmlparser.py", line 7, in <module>
from html.parser import (
ImportError: cannot import name 'HTMLParseError'
An easy-enough fix is to define create an HTMLParseError. Below is the diff patch.
--- _htmlparser.py 2016-06-09 17:15:35.000000000 -0500
+++ lib/python3.5/site-packages/bs4/builder/_htmlparser.py 2016-06-09 17:16:33.000000000 -0500
@@ -4,13 +4,29 @@
'HTMLParserTreeBuilder',
]
-from html.parser import (
- HTMLParser,
- HTMLParseError,
- )
+from html.parser import HTMLParser
import sys
import warnings
+class HTMLParseError(Exception):
+ """Exception raised for all parse errors."""
+
+ def __init__(self, msg, position=(None, None)):
+ assert msg
+ self.msg = msg
+ self.lineno = position[0]
+ self.offset = position[1]
+
+ def __str__(self):
+ result = self.msg
+ if self.lineno is not None:
+ result = result + ", at line %d" % self.lineno
+ if self.offset is not None:
+ result = result + ", column %d" % (self.offset + 1)
+ return result
+
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument, which we'd like to set to False. Unfortunately,
# http://bugs.python.org/issue13273 makes strict=True a better bet