mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-28 12:57:52 +00:00
Delete style utils.py and move its content inside html.py
The HTML parser is now the only one that makes use of those util methods
This commit is contained in:
parent
e7457de947
commit
cd1e41b130
@ -20,11 +20,11 @@ import html
|
|||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
from struct import unpack
|
||||||
|
|
||||||
import pyrogram
|
import pyrogram
|
||||||
from pyrogram.api import types
|
from pyrogram.api import types
|
||||||
from pyrogram.errors import PeerIdInvalid
|
from pyrogram.errors import PeerIdInvalid
|
||||||
from . import utils
|
|
||||||
|
|
||||||
|
|
||||||
class Parser(HTMLParser):
|
class Parser(HTMLParser):
|
||||||
@ -111,11 +111,28 @@ class Parser(HTMLParser):
|
|||||||
|
|
||||||
|
|
||||||
class HTML:
|
class HTML:
|
||||||
|
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
|
||||||
|
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
|
||||||
|
|
||||||
def __init__(self, client: "pyrogram.BaseClient" = None):
|
def __init__(self, client: "pyrogram.BaseClient" = None):
|
||||||
self.client = client
|
self.client = client
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def add_surrogates(text):
|
||||||
|
# Replace each SMP code point with a surrogate pair
|
||||||
|
return HTML.SMP_RE.sub(
|
||||||
|
lambda match: # Split SMP in two surrogates
|
||||||
|
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
|
||||||
|
text
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def remove_surrogates(text):
|
||||||
|
# Replace each surrogate pair with a SMP code point
|
||||||
|
return text.encode("utf-16", "surrogatepass").decode("utf-16")
|
||||||
|
|
||||||
def parse(self, text: str):
|
def parse(self, text: str):
|
||||||
text = utils.add_surrogates(str(text or "").strip())
|
text = HTML.add_surrogates(str(text or "").strip())
|
||||||
|
|
||||||
parser = Parser(self.client)
|
parser = Parser(self.client)
|
||||||
parser.feed(text)
|
parser.feed(text)
|
||||||
@ -123,6 +140,6 @@ class HTML:
|
|||||||
|
|
||||||
# TODO: OrderedDict to be removed in Python 3.6
|
# TODO: OrderedDict to be removed in Python 3.6
|
||||||
return OrderedDict([
|
return OrderedDict([
|
||||||
("message", utils.remove_surrogates(parser.text)),
|
("message", HTML.remove_surrogates(parser.text)),
|
||||||
("entities", parser.entities)
|
("entities", parser.entities)
|
||||||
])
|
])
|
||||||
|
@ -1,37 +0,0 @@
|
|||||||
# Pyrogram - Telegram MTProto API Client Library for Python
|
|
||||||
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
|
|
||||||
#
|
|
||||||
# This file is part of Pyrogram.
|
|
||||||
#
|
|
||||||
# Pyrogram is free software: you can redistribute it and/or modify
|
|
||||||
# it under the terms of the GNU Lesser General Public License as published
|
|
||||||
# by the Free Software Foundation, either version 3 of the License, or
|
|
||||||
# (at your option) any later version.
|
|
||||||
#
|
|
||||||
# Pyrogram is distributed in the hope that it will be useful,
|
|
||||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
# GNU Lesser General Public License for more details.
|
|
||||||
#
|
|
||||||
# You should have received a copy of the GNU Lesser General Public License
|
|
||||||
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
|
|
||||||
import re
|
|
||||||
from struct import unpack
|
|
||||||
|
|
||||||
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
|
|
||||||
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
|
|
||||||
|
|
||||||
|
|
||||||
def add_surrogates(text):
|
|
||||||
# Replace each SMP code point with a surrogate pair
|
|
||||||
return SMP_RE.sub(
|
|
||||||
lambda match: # Split SMP in two surrogates
|
|
||||||
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
|
|
||||||
text
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def remove_surrogates(text):
|
|
||||||
# Replace each surrogate pair with a SMP code point
|
|
||||||
return text.encode("utf-16", "surrogatepass").decode("utf-16")
|
|
Loading…
x
Reference in New Issue
Block a user