2
0
mirror of https://github.com/pyrogram/pyrogram synced 2025-08-28 12:57:52 +00:00

Delete style utils.py and move its content inside html.py

The HTML parser is now the only one that makes use of those util methods
This commit is contained in:
Dan 2019-06-24 14:33:17 +02:00
parent e7457de947
commit cd1e41b130
2 changed files with 20 additions and 40 deletions

View File

@ -20,11 +20,11 @@ import html
import re import re
from collections import OrderedDict from collections import OrderedDict
from html.parser import HTMLParser from html.parser import HTMLParser
from struct import unpack
import pyrogram import pyrogram
from pyrogram.api import types from pyrogram.api import types
from pyrogram.errors import PeerIdInvalid from pyrogram.errors import PeerIdInvalid
from . import utils
class Parser(HTMLParser): class Parser(HTMLParser):
@ -111,11 +111,28 @@ class Parser(HTMLParser):
class HTML: class HTML:
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def __init__(self, client: "pyrogram.BaseClient" = None): def __init__(self, client: "pyrogram.BaseClient" = None):
self.client = client self.client = client
@staticmethod
def add_surrogates(text):
# Replace each SMP code point with a surrogate pair
return HTML.SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
@staticmethod
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")
def parse(self, text: str): def parse(self, text: str):
text = utils.add_surrogates(str(text or "").strip()) text = HTML.add_surrogates(str(text or "").strip())
parser = Parser(self.client) parser = Parser(self.client)
parser.feed(text) parser.feed(text)
@ -123,6 +140,6 @@ class HTML:
# TODO: OrderedDict to be removed in Python 3.6 # TODO: OrderedDict to be removed in Python 3.6
return OrderedDict([ return OrderedDict([
("message", utils.remove_surrogates(parser.text)), ("message", HTML.remove_surrogates(parser.text)),
("entities", parser.entities) ("entities", parser.entities)
]) ])

View File

@ -1,37 +0,0 @@
# Pyrogram - Telegram MTProto API Client Library for Python
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
#
# This file is part of Pyrogram.
#
# Pyrogram is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyrogram is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import re
from struct import unpack
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def add_surrogates(text):
# Replace each SMP code point with a surrogate pair
return SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")