mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-29 21:38:04 +00:00
Merge branch 'develop' into asyncio
# Conflicts: # pyrogram/client/style/html.py # pyrogram/client/style/markdown.py
This commit is contained in:
commit
7cd145b0db
2
.github/FUNDING.yml
vendored
2
.github/FUNDING.yml
vendored
@ -1,2 +1,2 @@
|
||||
github: delivrance
|
||||
# github: delivrance
|
||||
custom: https://docs.pyrogram.org/support-pyrogram
|
||||
|
@ -9,4 +9,5 @@ RANDOM_ID_DUPLICATE Telegram is having internal problems. Please try again later
|
||||
WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later
|
||||
INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later
|
||||
INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later
|
||||
FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
|
||||
FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
|
||||
MSGID_DECREASE_RETRY Telegram is having internal problems. Please try again later
|
|
@ -16,127 +16,168 @@
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import html
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from html.parser import HTMLParser
|
||||
|
||||
import pyrogram
|
||||
from pyrogram.api.types import (
|
||||
MessageEntityBold as Bold,
|
||||
MessageEntityItalic as Italic,
|
||||
MessageEntityCode as Code,
|
||||
MessageEntityTextUrl as Url,
|
||||
MessageEntityPre as Pre,
|
||||
MessageEntityUnderline as Underline,
|
||||
MessageEntityStrike as Strike,
|
||||
MessageEntityBlockquote as Blockquote,
|
||||
MessageEntityMentionName as MentionInvalid,
|
||||
InputMessageEntityMentionName as Mention,
|
||||
)
|
||||
from pyrogram.api import types
|
||||
from pyrogram.errors import PeerIdInvalid
|
||||
from . import utils
|
||||
|
||||
|
||||
class HTML:
|
||||
HTML_RE = re.compile(r"<(\w+)(?: href=([\"'])([^<]+)\2)?>([^>]+)</\1>")
|
||||
class Parser(HTMLParser):
|
||||
MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
|
||||
|
||||
def __init__(self, client: "pyrogram.BaseClient"):
|
||||
super().__init__()
|
||||
|
||||
self.client = client
|
||||
|
||||
self.text = ""
|
||||
self.entities = []
|
||||
self.tag_entities = {}
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
attrs = dict(attrs)
|
||||
extra = {}
|
||||
|
||||
if tag in ["b", "strong"]:
|
||||
entity = types.MessageEntityBold
|
||||
elif tag in ["i", "em"]:
|
||||
entity = types.MessageEntityItalic
|
||||
elif tag == "u":
|
||||
entity = types.MessageEntityUnderline
|
||||
elif tag in ["s", "del", "strike"]:
|
||||
entity = types.MessageEntityStrike
|
||||
elif tag == "blockquote":
|
||||
entity = types.MessageEntityBlockquote
|
||||
elif tag == "code":
|
||||
entity = types.MessageEntityCode
|
||||
elif tag == "pre":
|
||||
entity = types.MessageEntityPre
|
||||
extra["language"] = ""
|
||||
elif tag == "a":
|
||||
url = attrs.get("href", "")
|
||||
|
||||
mention = Parser.MENTION_RE.match(url)
|
||||
|
||||
if mention:
|
||||
entity = types.InputMessageEntityMentionName
|
||||
extra["user_id"] = int(mention.group(1))
|
||||
else:
|
||||
entity = types.MessageEntityTextUrl
|
||||
extra["url"] = url
|
||||
else:
|
||||
return
|
||||
|
||||
if tag not in self.tag_entities:
|
||||
self.tag_entities[tag] = []
|
||||
|
||||
self.tag_entities[tag].append(entity(offset=len(self.text), length=0, **extra))
|
||||
|
||||
def handle_data(self, data):
|
||||
data = html.unescape(data)
|
||||
|
||||
for entities in self.tag_entities.values():
|
||||
for entity in entities:
|
||||
entity.length += len(data)
|
||||
|
||||
self.text += data
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
try:
|
||||
self.entities.append(self.tag_entities[tag].pop())
|
||||
except (KeyError, IndexError):
|
||||
line, offset = self.getpos()
|
||||
offset += 1
|
||||
|
||||
raise ValueError("Unmatched closing tag </{}> at line {}:{}".format(tag, line, offset))
|
||||
else:
|
||||
if not self.tag_entities[tag]:
|
||||
self.tag_entities.pop(tag)
|
||||
|
||||
def error(self, message):
|
||||
pass
|
||||
|
||||
|
||||
class HTML:
|
||||
def __init__(self, client: "pyrogram.BaseClient" = None):
|
||||
self.client = client
|
||||
|
||||
async def parse(self, message: str):
|
||||
message = utils.add_surrogates(str(message or ""))
|
||||
async def parse(self, text: str):
|
||||
text = utils.add_surrogates(str(text or "").strip())
|
||||
|
||||
parser = Parser(self.client)
|
||||
parser.feed(text)
|
||||
parser.close()
|
||||
|
||||
if parser.tag_entities:
|
||||
unclosed_tags = []
|
||||
|
||||
for tag, entities in parser.tag_entities.items():
|
||||
unclosed_tags.append("<{}> (x{})".format(tag, len(entities)))
|
||||
|
||||
raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags)))
|
||||
|
||||
entities = []
|
||||
offset = 0
|
||||
|
||||
for match in self.HTML_RE.finditer(message):
|
||||
start = match.start() - offset
|
||||
style, url, body = match.group(1, 3, 4)
|
||||
|
||||
if url:
|
||||
mention = self.MENTION_RE.match(url)
|
||||
|
||||
if mention:
|
||||
user_id = int(mention.group(1))
|
||||
|
||||
try:
|
||||
input_user = await self.client.resolve_peer(user_id)
|
||||
except PeerIdInvalid:
|
||||
input_user = None
|
||||
|
||||
entity = (
|
||||
Mention(offset=start, length=len(body), user_id=input_user)
|
||||
if input_user else MentionInvalid(offset=start, length=len(body), user_id=user_id)
|
||||
)
|
||||
else:
|
||||
entity = Url(offset=start, length=len(body), url=url)
|
||||
else:
|
||||
if style == "b" or style == "strong":
|
||||
entity = Bold(offset=start, length=len(body))
|
||||
elif style == "i" or style == "em":
|
||||
entity = Italic(offset=start, length=len(body))
|
||||
elif style == "code":
|
||||
entity = Code(offset=start, length=len(body))
|
||||
elif style == "pre":
|
||||
entity = Pre(offset=start, length=len(body), language="")
|
||||
elif style == "u":
|
||||
entity = Underline(offset=start, length=len(body))
|
||||
elif style in ["strike", "s", "del"]:
|
||||
entity = Strike(offset=start, length=len(body))
|
||||
elif style == "blockquote":
|
||||
entity = Blockquote(offset=start, length=len(body))
|
||||
else:
|
||||
for entity in parser.entities:
|
||||
if isinstance(entity, types.InputMessageEntityMentionName):
|
||||
try:
|
||||
entity.user_id = await self.client.resolve_peer(entity.user_id)
|
||||
except PeerIdInvalid:
|
||||
continue
|
||||
|
||||
entities.append(entity)
|
||||
message = message.replace(match.group(), body)
|
||||
offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0)
|
||||
|
||||
# TODO: OrderedDict to be removed in Python3.6
|
||||
# TODO: OrderedDict to be removed in Python 3.6
|
||||
return OrderedDict([
|
||||
("message", utils.remove_surrogates(message)),
|
||||
("message", utils.remove_surrogates(parser.text)),
|
||||
("entities", entities)
|
||||
])
|
||||
|
||||
def unparse(self, message: str, entities: list):
|
||||
message = utils.add_surrogates(message).strip()
|
||||
offset = 0
|
||||
@staticmethod
|
||||
def unparse(text: str, entities: list):
|
||||
text = utils.add_surrogates(text)
|
||||
copy = text
|
||||
|
||||
for entity in entities:
|
||||
start = entity.offset + offset
|
||||
start = entity.offset
|
||||
end = start + entity.length
|
||||
|
||||
type = entity.type
|
||||
|
||||
url = entity.url
|
||||
user = entity.user
|
||||
sub = message[start: start + entity.length]
|
||||
|
||||
sub = copy[start:end]
|
||||
|
||||
if type == "bold":
|
||||
style = "b"
|
||||
elif type == "italic":
|
||||
style = "i"
|
||||
elif type == "code":
|
||||
style = "code"
|
||||
elif type == "pre":
|
||||
style = "pre"
|
||||
elif type == "underline":
|
||||
style = "u"
|
||||
elif type == "strike":
|
||||
style = "s"
|
||||
elif type == "code":
|
||||
style = "code"
|
||||
elif type == "pre":
|
||||
style = "pre"
|
||||
elif type == "blockquote":
|
||||
style = "blockquote"
|
||||
elif type == "text_link":
|
||||
offset += 15 + len(url)
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "<a href=\"{}\">{}</a>".format(url, sub), 1)
|
||||
text = text[:start] + text[start:].replace(sub, '<a href="{}">{}</a>'.format(url, sub), 1)
|
||||
continue
|
||||
elif type == "text_mention":
|
||||
offset += 28 + len(str(user.id))
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "<a href=\"tg://user?id={}\">{}</a>".format(user.id, sub), 1)
|
||||
text = text[:start] + text[start:].replace(
|
||||
sub, '<a href="tg://user?id={}">{}</a>'.format(user.id, sub), 1)
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
offset += len(style) * 2 + 5
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "<{0}>{1}</{0}>".format(style, sub), 1)
|
||||
text = text[:start] + text[start:].replace(sub, "<{0}>{1}</{0}>".format(style, sub), 1)
|
||||
|
||||
return utils.remove_surrogates(message)
|
||||
return utils.remove_surrogates(text)
|
||||
|
@ -16,147 +16,142 @@
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import html
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
|
||||
import pyrogram
|
||||
from pyrogram.api.types import (
|
||||
MessageEntityBold as Bold,
|
||||
MessageEntityItalic as Italic,
|
||||
MessageEntityCode as Code,
|
||||
MessageEntityTextUrl as Url,
|
||||
MessageEntityPre as Pre,
|
||||
MessageEntityUnderline as Underline,
|
||||
MessageEntityStrike as Strike,
|
||||
MessageEntityMentionName as MentionInvalid,
|
||||
InputMessageEntityMentionName as Mention
|
||||
)
|
||||
from pyrogram.errors import PeerIdInvalid
|
||||
from . import utils
|
||||
from .html import HTML
|
||||
|
||||
BOLD_DELIM = "**"
|
||||
ITALIC_DELIM = "__"
|
||||
UNDERLINE_DELIM = "--"
|
||||
STRIKE_DELIM = "~~"
|
||||
CODE_DELIM = "`"
|
||||
PRE_DELIM = "```"
|
||||
|
||||
|
||||
class Markdown:
|
||||
BOLD_DELIMITER = "**"
|
||||
ITALIC_DELIMITER = "__"
|
||||
UNDERLINE_DELIMITER = "--"
|
||||
STRIKE_DELIMITER = "~~"
|
||||
CODE_DELIMITER = "`"
|
||||
PRE_DELIMITER = "```"
|
||||
|
||||
MARKDOWN_RE = re.compile(r"({d})([\w\W]*?)\1|\[([^[]+?)\]\(([^(]+?)\)".format(
|
||||
MARKDOWN_RE = re.compile(r"({d})".format(
|
||||
d="|".join(
|
||||
["".join(i) for i in [
|
||||
["\{}".format(j) for j in i]
|
||||
[r"\{}".format(j) for j in i]
|
||||
for i in [
|
||||
PRE_DELIMITER,
|
||||
CODE_DELIMITER,
|
||||
STRIKE_DELIMITER,
|
||||
UNDERLINE_DELIMITER,
|
||||
ITALIC_DELIMITER,
|
||||
BOLD_DELIMITER
|
||||
PRE_DELIM,
|
||||
CODE_DELIM,
|
||||
STRIKE_DELIM,
|
||||
UNDERLINE_DELIM,
|
||||
ITALIC_DELIM,
|
||||
BOLD_DELIM
|
||||
]
|
||||
]]
|
||||
)
|
||||
))
|
||||
MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
|
||||
)))
|
||||
|
||||
def __init__(self, client: "pyrogram.BaseClient" = None):
|
||||
self.client = client
|
||||
URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
|
||||
|
||||
OPENING_TAG = "<{}>"
|
||||
CLOSING_TAG = "</{}>"
|
||||
URL_MARKUP = '<a href="{}">{}</a>'
|
||||
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
|
||||
|
||||
def __init__(self, client: "pyrogram.BaseClient"):
|
||||
self.html = HTML(client)
|
||||
|
||||
async def parse(self, text: str):
|
||||
text = html.escape(text)
|
||||
|
||||
async def parse(self, message: str):
|
||||
message = utils.add_surrogates(str(message or "")).strip()
|
||||
entities = []
|
||||
offset = 0
|
||||
delims = set()
|
||||
|
||||
for match in self.MARKDOWN_RE.finditer(message):
|
||||
start = match.start() - offset
|
||||
style, body, text, url = match.groups()
|
||||
for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
|
||||
start, stop = match.span()
|
||||
delim = match.group(1)
|
||||
|
||||
if url:
|
||||
mention = self.MENTION_RE.match(url)
|
||||
|
||||
if mention:
|
||||
user_id = int(mention.group(1))
|
||||
|
||||
try:
|
||||
input_user = await self.client.resolve_peer(user_id)
|
||||
except PeerIdInvalid:
|
||||
input_user = None
|
||||
|
||||
entity = (
|
||||
Mention(offset=start, length=len(text), user_id=input_user)
|
||||
if input_user else MentionInvalid(offset=start, length=len(text), user_id=user_id)
|
||||
)
|
||||
else:
|
||||
entity = Url(offset=start, length=len(text), url=url)
|
||||
|
||||
body = text
|
||||
offset += len(url) + 4
|
||||
if delim == BOLD_DELIM:
|
||||
tag = "b"
|
||||
elif delim == ITALIC_DELIM:
|
||||
tag = "i"
|
||||
elif delim == UNDERLINE_DELIM:
|
||||
tag = "u"
|
||||
elif delim == STRIKE_DELIM:
|
||||
tag = "s"
|
||||
elif delim == CODE_DELIM:
|
||||
tag = "code"
|
||||
elif delim == PRE_DELIM:
|
||||
tag = "pre"
|
||||
else:
|
||||
if style == self.BOLD_DELIMITER:
|
||||
entity = Bold(offset=start, length=len(body))
|
||||
elif style == self.ITALIC_DELIMITER:
|
||||
entity = Italic(offset=start, length=len(body))
|
||||
elif style == self.UNDERLINE_DELIMITER:
|
||||
entity = Underline(offset=start, length=len(body))
|
||||
elif style == self.STRIKE_DELIMITER:
|
||||
entity = Strike(offset=start, length=len(body))
|
||||
elif style == self.CODE_DELIMITER:
|
||||
entity = Code(offset=start, length=len(body))
|
||||
elif style == self.PRE_DELIMITER:
|
||||
entity = Pre(offset=start, length=len(body), language="")
|
||||
else:
|
||||
continue
|
||||
continue
|
||||
|
||||
offset += len(style) * 2
|
||||
if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
|
||||
continue
|
||||
|
||||
entities.append(entity)
|
||||
message = message.replace(match.group(), body)
|
||||
if delim not in delims:
|
||||
delims.add(delim)
|
||||
tag = Markdown.OPENING_TAG.format(tag)
|
||||
else:
|
||||
delims.remove(delim)
|
||||
tag = Markdown.CLOSING_TAG.format(tag)
|
||||
|
||||
# TODO: OrderedDict to be removed in Python3.6
|
||||
return OrderedDict([
|
||||
("message", utils.remove_surrogates(message)),
|
||||
("entities", entities)
|
||||
])
|
||||
text = text[:start + offset] + tag + text[stop + offset:]
|
||||
|
||||
offset += len(tag) - len(delim)
|
||||
|
||||
def unparse(self, message: str, entities: list):
|
||||
message = utils.add_surrogates(message).strip()
|
||||
offset = 0
|
||||
|
||||
for match in re.finditer(Markdown.URL_RE, text):
|
||||
start, stop = match.span()
|
||||
full = match.group(0)
|
||||
|
||||
body, url = match.groups()
|
||||
replace = Markdown.URL_MARKUP.format(url, body)
|
||||
|
||||
text = text[:start + offset] + replace + text[stop + offset:]
|
||||
|
||||
offset += len(replace) - len(full)
|
||||
|
||||
return await self.html.parse(text)
|
||||
|
||||
@staticmethod
|
||||
def unparse(text: str, entities: list):
|
||||
text = utils.add_surrogates(text)
|
||||
copy = text
|
||||
|
||||
for entity in entities:
|
||||
start = entity.offset + offset
|
||||
start = entity.offset
|
||||
end = start + entity.length
|
||||
|
||||
type = entity.type
|
||||
|
||||
url = entity.url
|
||||
user = entity.user
|
||||
sub = message[start: start + entity.length]
|
||||
|
||||
sub = copy[start:end]
|
||||
|
||||
if type == "bold":
|
||||
style = self.BOLD_DELIMITER
|
||||
style = BOLD_DELIM
|
||||
elif type == "italic":
|
||||
style = self.ITALIC_DELIMITER
|
||||
style = ITALIC_DELIM
|
||||
elif type == "underline":
|
||||
style = self.UNDERLINE_DELIMITER
|
||||
style = UNDERLINE_DELIM
|
||||
elif type == "strike":
|
||||
style = self.STRIKE_DELIMITER
|
||||
style = STRIKE_DELIM
|
||||
elif type == "code":
|
||||
style = self.CODE_DELIMITER
|
||||
style = CODE_DELIM
|
||||
elif type == "pre":
|
||||
style = self.PRE_DELIMITER
|
||||
style = PRE_DELIM
|
||||
# TODO: Blockquote for MD
|
||||
# elif type == "blockquote":
|
||||
# style = ...
|
||||
elif type == "text_link":
|
||||
offset += 4 + len(url)
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "[{}]({})".format(sub, url), 1)
|
||||
text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
|
||||
continue
|
||||
elif type == "text_mention":
|
||||
offset += 17 + len(str(user.id))
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "[{}](tg://user?id={})".format(sub, user.id), 1)
|
||||
text = text[:start] + text[start:].replace(
|
||||
sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
|
||||
offset += len(style) * 2
|
||||
message = message[:start] + message[start:].replace(
|
||||
sub, "{0}{1}{0}".format(style, sub), 1)
|
||||
text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
|
||||
|
||||
return utils.remove_surrogates(message)
|
||||
return utils.remove_surrogates(text)
|
||||
|
@ -31,32 +31,30 @@ from ..object import Object
|
||||
from ..update import Update
|
||||
from ..user_and_chats.chat import Chat
|
||||
from ..user_and_chats.user import User
|
||||
from ...style import utils, Markdown, HTML
|
||||
|
||||
|
||||
class Str(str):
|
||||
def __init__(self, *args):
|
||||
super().__init__()
|
||||
|
||||
self._client = None
|
||||
self._entities = None
|
||||
self.entities = None
|
||||
|
||||
def init(self, client, entities):
|
||||
self._client = client
|
||||
self._entities = entities
|
||||
def init(self, entities):
|
||||
self.entities = entities
|
||||
|
||||
return self
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self
|
||||
|
||||
@property
|
||||
def markdown(self):
|
||||
return self._client.markdown.unparse(self, self._entities)
|
||||
return Markdown.unparse(self, self.entities)
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return self._client.html.unparse(self, self._entities)
|
||||
return HTML.unparse(self, self.entities)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return utils.remove_surrogates(utils.add_surrogates(self)[item])
|
||||
|
||||
|
||||
class Message(Object, Update):
|
||||
@ -486,7 +484,7 @@ class Message(Object, Update):
|
||||
|
||||
if isinstance(message, types.Message):
|
||||
entities = [MessageEntity._parse(client, entity, users) for entity in message.entities]
|
||||
entities = list(filter(lambda x: x is not None, entities))
|
||||
entities = pyrogram.List(filter(lambda x: x is not None, entities))
|
||||
|
||||
forward_from = None
|
||||
forward_sender_name = None
|
||||
@ -603,8 +601,8 @@ class Message(Object, Update):
|
||||
date=message.date,
|
||||
chat=Chat._parse(client, message, users, chats),
|
||||
from_user=User._parse(client, users.get(message.from_id, None)),
|
||||
text=Str(message.message).init(client, entities) or None if media is None else None,
|
||||
caption=Str(message.message).init(client, entities) or None if media is not None else None,
|
||||
text=Str(message.message).init(entities) or None if media is None else None,
|
||||
caption=Str(message.message).init(entities) or None if media is not None else None,
|
||||
entities=entities or None if media is None else None,
|
||||
caption_entities=entities or None if media is not None else None,
|
||||
author_signature=message.post_author,
|
||||
|
Loading…
x
Reference in New Issue
Block a user