mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-29 05:18:10 +00:00
Update the HTML logic to output well-formed elements (#1155)
* unparsing html entities with deque * unparsing using a stack (recursive)
This commit is contained in:
parent
ad773455a7
commit
2ed000381d
@ -155,11 +155,10 @@ class HTML:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def unparse(text: str, entities: list):
|
def unparse(text: str, entities: list):
|
||||||
text = utils.add_surrogates(text)
|
def parse_one(entity):
|
||||||
|
"""
|
||||||
entities_offsets = []
|
Parses a single entity and returns (start_tag, start), (end_tag, end)
|
||||||
|
"""
|
||||||
for entity in entities:
|
|
||||||
entity_type = entity.type
|
entity_type = entity.type
|
||||||
start = entity.offset
|
start = entity.offset
|
||||||
end = start + entity.length
|
end = start + entity.length
|
||||||
@ -199,21 +198,43 @@ class HTML:
|
|||||||
start_tag = f'<emoji id="{custom_emoji_id}">'
|
start_tag = f'<emoji id="{custom_emoji_id}">'
|
||||||
end_tag = "</emoji>"
|
end_tag = "</emoji>"
|
||||||
else:
|
else:
|
||||||
continue
|
return
|
||||||
|
|
||||||
entities_offsets.append((start_tag, start,))
|
return (start_tag, start), (end_tag, end)
|
||||||
entities_offsets.append((end_tag, end,))
|
|
||||||
|
|
||||||
entities_offsets = map(
|
def recursive(entity_i: int) -> int:
|
||||||
lambda x: x[1],
|
"""
|
||||||
sorted(
|
Takes the index of the entity to start parsing from, returns the number of parsed entities inside it.
|
||||||
enumerate(entities_offsets),
|
Uses entities_offsets as a stack, pushing (start_tag, start) first, then parsing nested entities,
|
||||||
key=lambda x: (x[1][1], x[0]),
|
and finally pushing (end_tag, end) to the stack.
|
||||||
reverse=True
|
No need to sort at the end.
|
||||||
)
|
"""
|
||||||
)
|
this = parse_one(entities[entity_i])
|
||||||
|
if this is None:
|
||||||
|
return 1
|
||||||
|
(start_tag, start), (end_tag, end) = this
|
||||||
|
entities_offsets.append((start_tag, start))
|
||||||
|
internal_i = entity_i + 1
|
||||||
|
# while the next entity is inside the current one, keep parsing
|
||||||
|
while internal_i < len(entities) and entities[internal_i].offset < end:
|
||||||
|
internal_i += recursive(internal_i)
|
||||||
|
entities_offsets.append((end_tag, end))
|
||||||
|
return internal_i - entity_i
|
||||||
|
|
||||||
for entity, offset in entities_offsets:
|
text = utils.add_surrogates(text)
|
||||||
|
|
||||||
|
entities_offsets = []
|
||||||
|
|
||||||
|
# probably useless because entities are already sorted by telegram
|
||||||
|
entities.sort(key=lambda e: (e.offset, -e.length))
|
||||||
|
|
||||||
|
# main loop for first-level entities
|
||||||
|
i = 0
|
||||||
|
while i < len(entities):
|
||||||
|
i += recursive(i)
|
||||||
|
|
||||||
|
# no need to sort, but still add entities starting from the end
|
||||||
|
for entity, offset in reversed(entities_offsets):
|
||||||
text = text[:offset] + entity + text[offset:]
|
text = text[:offset] + entity + text[offset:]
|
||||||
|
|
||||||
return utils.remove_surrogates(text)
|
return utils.remove_surrogates(text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user