2
0
mirror of https://github.com/pyrogram/pyrogram synced 2025-08-28 21:07:59 +00:00

Revert "Delete style utils.py and move its content inside html.py The HTML parser is now the only one that makes use of those util methods"

This reverts commit cd1e41b1
This commit is contained in:
Dan 2019-06-25 05:47:57 +02:00
parent cd1e41b130
commit de02848a69
2 changed files with 40 additions and 20 deletions

View File

@ -20,11 +20,11 @@ import html
import re
from collections import OrderedDict
from html.parser import HTMLParser
from struct import unpack
import pyrogram
from pyrogram.api import types
from pyrogram.errors import PeerIdInvalid
from . import utils
class Parser(HTMLParser):
@ -111,28 +111,11 @@ class Parser(HTMLParser):
class HTML:
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def __init__(self, client: "pyrogram.BaseClient" = None):
self.client = client
@staticmethod
def add_surrogates(text):
# Replace each SMP code point with a surrogate pair
return HTML.SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
@staticmethod
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")
def parse(self, text: str):
text = HTML.add_surrogates(str(text or "").strip())
text = utils.add_surrogates(str(text or "").strip())
parser = Parser(self.client)
parser.feed(text)
@ -140,6 +123,6 @@ class HTML:
# TODO: OrderedDict to be removed in Python 3.6
return OrderedDict([
("message", HTML.remove_surrogates(parser.text)),
("message", utils.remove_surrogates(parser.text)),
("entities", parser.entities)
])

View File

@ -0,0 +1,37 @@
# Pyrogram - Telegram MTProto API Client Library for Python
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
#
# This file is part of Pyrogram.
#
# Pyrogram is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyrogram is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import re
from struct import unpack
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def add_surrogates(text):
# Replace each SMP code point with a surrogate pair
return SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")