mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-28 04:48:06 +00:00
Make the HTML parser more sound
This commit is contained in:
parent
07bc7e39df
commit
a086964e85
@ -95,7 +95,16 @@ class Parser(HTMLParser):
|
|||||||
self.text += data
|
self.text += data
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
self.entities.append(self.tag_entities[tag].pop())
|
try:
|
||||||
|
self.entities.append(self.tag_entities[tag].pop())
|
||||||
|
except (KeyError, IndexError):
|
||||||
|
line, offset = self.getpos()
|
||||||
|
offset += 1
|
||||||
|
|
||||||
|
raise ValueError("Unmatched closing tag </{}> at line {}:{}".format(tag, line, offset))
|
||||||
|
else:
|
||||||
|
if not self.tag_entities[tag]:
|
||||||
|
self.tag_entities.pop(tag)
|
||||||
|
|
||||||
def error(self, message):
|
def error(self, message):
|
||||||
pass
|
pass
|
||||||
@ -112,6 +121,14 @@ class HTML:
|
|||||||
parser.feed(text)
|
parser.feed(text)
|
||||||
parser.close()
|
parser.close()
|
||||||
|
|
||||||
|
if parser.tag_entities:
|
||||||
|
unclosed_tags = []
|
||||||
|
|
||||||
|
for tag, entities in parser.tag_entities.items():
|
||||||
|
unclosed_tags.append("<{}> (x{})".format(tag, len(entities)))
|
||||||
|
|
||||||
|
raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags)))
|
||||||
|
|
||||||
# TODO: OrderedDict to be removed in Python 3.6
|
# TODO: OrderedDict to be removed in Python 3.6
|
||||||
return OrderedDict([
|
return OrderedDict([
|
||||||
("message", utils.remove_surrogates(parser.text)),
|
("message", utils.remove_surrogates(parser.text)),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user