2
0
mirror of https://github.com/pyrogram/pyrogram synced 2025-08-29 05:18:10 +00:00

Update the HTML logic to output well-formed elements (#1155)

* unparsing html entities with deque

* unparsing using a stack (recursive)
This commit is contained in:
Andrea Princic 2022-12-06 18:29:27 +01:00 committed by GitHub
parent ad773455a7
commit 2ed000381d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -155,11 +155,10 @@ class HTML:
@staticmethod @staticmethod
def unparse(text: str, entities: list): def unparse(text: str, entities: list):
text = utils.add_surrogates(text) def parse_one(entity):
"""
entities_offsets = [] Parses a single entity and returns (start_tag, start), (end_tag, end)
"""
for entity in entities:
entity_type = entity.type entity_type = entity.type
start = entity.offset start = entity.offset
end = start + entity.length end = start + entity.length
@ -199,21 +198,43 @@ class HTML:
start_tag = f'<emoji id="{custom_emoji_id}">' start_tag = f'<emoji id="{custom_emoji_id}">'
end_tag = "</emoji>" end_tag = "</emoji>"
else: else:
continue return
entities_offsets.append((start_tag, start,)) return (start_tag, start), (end_tag, end)
entities_offsets.append((end_tag, end,))
entities_offsets = map( def recursive(entity_i: int) -> int:
lambda x: x[1], """
sorted( Takes the index of the entity to start parsing from, returns the number of parsed entities inside it.
enumerate(entities_offsets), Uses entities_offsets as a stack, pushing (start_tag, start) first, then parsing nested entities,
key=lambda x: (x[1][1], x[0]), and finally pushing (end_tag, end) to the stack.
reverse=True No need to sort at the end.
) """
) this = parse_one(entities[entity_i])
if this is None:
return 1
(start_tag, start), (end_tag, end) = this
entities_offsets.append((start_tag, start))
internal_i = entity_i + 1
# while the next entity is inside the current one, keep parsing
while internal_i < len(entities) and entities[internal_i].offset < end:
internal_i += recursive(internal_i)
entities_offsets.append((end_tag, end))
return internal_i - entity_i
for entity, offset in entities_offsets: text = utils.add_surrogates(text)
entities_offsets = []
# probably useless because entities are already sorted by telegram
entities.sort(key=lambda e: (e.offset, -e.length))
# main loop for first-level entities
i = 0
while i < len(entities):
i += recursive(i)
# no need to sort, but still add entities starting from the end
for entity, offset in reversed(entities_offsets):
text = text[:offset] + entity + text[offset:] text = text[:offset] + entity + text[offset:]
return utils.remove_surrogates(text) return utils.remove_surrogates(text)