Here is an update to that patch: it also add position to
NavigableString instances. As far as I know, I have positions
everywhere they can go now:
1015c1015
< self.soup.extractCharsetFromMeta(attrs)
---
> self.soup.extractCharsetFromMeta(attrs, position=self.getpos())
1017c1017
< self.soup.unknown_starttag(name, attrs)
---
> self.soup.unknown_starttag(name, attrs, position=self.getpos())
1020c1020
< self.soup.unknown_endtag(name)
---
> self.soup.unknown_endtag(name, position=self.getpos())
1023c1023
< self.soup.handle_data(content)
---
> self.soup.handle_data(content, position=self.getpos())
1030c1030
< self.soup.endData(subclass)
---
> self.soup.endData(subclass, position=self.getpos())
1279a1283
> self.currentDataPosition = None
1306c1310
< def endData(self, containerClass=NavigableString):
---
> def endData(self, containerClass=NavigableString, position=None):
1308a1313
> currentDataPosition = self.currentDataPosition
1316a1322
> self.currentDataPosition = None
1322a1329
> o.position = currentDataPosition
1397,1398c1404,1405
< def unknown_starttag(self, name, attrs, selfClosing=0):
< #print "Start tag %s: %s" % (name, attrs)
---
> def unknown_starttag(self, name, attrs, position, selfClosing=0):
> #print "Start tag %s: %s" % (name, attrs), position
1403c1410
< self.handle_data('<%s%s>' % (name, attrs))
---
> self.handle_data('<%s%s>' % (name, attrs), position)
1414a1422
> tag.position = position
1427c1435
< def unknown_endtag(self, name):
---
> def unknown_endtag(self, name, position):
1432c1440
< self.handle_data('</%s>' % name)
---
> self.handle_data('</%s>' % name, position)
1440c1448,1450
< def handle_data(self, data):
---
> def handle_data(self, data, position):
> if not self.currentDataPosition:
> self.currentDataPosition = position
1443,1444c1453,1454
< def extractCharsetFromMeta(self, attrs):
< self.unknown_starttag('meta', attrs)
---
> def extractCharsetFromMeta(self, attrs, position):
> self.unknown_starttag('meta', attrs, position)
1553c1563
< def extractCharsetFromMeta(self, attrs):
---
> def extractCharsetFromMeta(self, attrs, position):
1596c1606
< tag = self.unknown_starttag("meta", attrs)
---
> tag = self.unknown_starttag("meta", attrs, position)