python3.5 HTMLParseError problems

624 views
Skip to first unread message

Adam English

unread,
Jun 9, 2016, 6:33:26 PM6/9/16
to beautifulsoup
python3.5 officially removed html.parser.HTMLParseError.
This is used inside of bs4/builder/htmlparser.py so when importing bs4:

>>> import bs4

Traceback (most recent call last):

  File "<stdin>", line 1, in <module>

  File "<redacted>/lib/python3.5/site-packages/bs4/__init__.py", line 30, in <module>

    from .builder import builder_registry, ParserRejectedMarkup

  File "/<redacted>/lib/python3.5/site-packages/bs4/builder/__init__.py", line 308, in <module>

    from . import _htmlparser

  File "<redacted>/lib/python3.5/site-packages/bs4/builder/_htmlparser.py", line 7, in <module>

    from html.parser import (

ImportError: cannot import name 'HTMLParseError'


An easy-enough fix is to define create an HTMLParseError. Below is the diff patch.


--- _htmlparser.py 2016-06-09 17:15:35.000000000 -0500

+++ lib/python3.5/site-packages/bs4/builder/_htmlparser.py 2016-06-09 17:16:33.000000000 -0500

@@ -4,13 +4,29 @@

     'HTMLParserTreeBuilder',

     ]

 

-from html.parser import (

-    HTMLParser,

-    HTMLParseError,

-    )

+from html.parser import HTMLParser

 import sys

 import warnings

 

+class HTMLParseError(Exception):

+    """Exception raised for all parse errors."""

+

+    def __init__(self, msg, position=(None, None)):

+        assert msg

+        self.msg = msg

+        self.lineno = position[0]

+        self.offset = position[1]

+

+    def __str__(self):

+        result = self.msg

+        if self.lineno is not None:

+            result = result + ", at line %d" % self.lineno

+        if self.offset is not None:

+            result = result + ", column %d" % (self.offset + 1)

+        return result

+

 # Starting in Python 3.2, the HTMLParser constructor takes a 'strict'

 # argument, which we'd like to set to False. Unfortunately,

 # http://bugs.python.org/issue13273 makes strict=True a better bet

Reply all
Reply to author
Forward
0 new messages