mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-28 04:48:06 +00:00
Make the HTML parser more sound
This commit is contained in:
parent
07bc7e39df
commit
a086964e85
@ -95,7 +95,16 @@ class Parser(HTMLParser):
|
||||
self.text += data
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
try:
|
||||
self.entities.append(self.tag_entities[tag].pop())
|
||||
except (KeyError, IndexError):
|
||||
line, offset = self.getpos()
|
||||
offset += 1
|
||||
|
||||
raise ValueError("Unmatched closing tag </{}> at line {}:{}".format(tag, line, offset))
|
||||
else:
|
||||
if not self.tag_entities[tag]:
|
||||
self.tag_entities.pop(tag)
|
||||
|
||||
def error(self, message):
|
||||
pass
|
||||
@ -112,6 +121,14 @@ class HTML:
|
||||
parser.feed(text)
|
||||
parser.close()
|
||||
|
||||
if parser.tag_entities:
|
||||
unclosed_tags = []
|
||||
|
||||
for tag, entities in parser.tag_entities.items():
|
||||
unclosed_tags.append("<{}> (x{})".format(tag, len(entities)))
|
||||
|
||||
raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags)))
|
||||
|
||||
# TODO: OrderedDict to be removed in Python 3.6
|
||||
return OrderedDict([
|
||||
("message", utils.remove_surrogates(parser.text)),
|
||||
|
Loading…
x
Reference in New Issue
Block a user