2
0
mirror of https://github.com/pyrogram/pyrogram synced 2025-08-30 05:48:14 +00:00

Merge branch 'develop' into asyncio

# Conflicts:
#	pyrogram/client/style/html.py
#	pyrogram/client/style/markdown.py
This commit is contained in:
Dan 2019-06-25 11:48:43 +02:00
commit 7cd145b0db
5 changed files with 229 additions and 194 deletions

2
.github/FUNDING.yml vendored
View File

@ -1,2 +1,2 @@
github: delivrance # github: delivrance
custom: https://docs.pyrogram.org/support-pyrogram custom: https://docs.pyrogram.org/support-pyrogram

View File

@ -9,4 +9,5 @@ RANDOM_ID_DUPLICATE Telegram is having internal problems. Please try again later
WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later
INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later
INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later
FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
MSGID_DECREASE_RETRY Telegram is having internal problems. Please try again later
1 id message
9 WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later
10 INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later
11 INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later
12 FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
13 MSGID_DECREASE_RETRY Telegram is having internal problems. Please try again later

View File

@ -16,127 +16,168 @@
# You should have received a copy of the GNU Lesser General Public License # You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>. # along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import html
import re import re
from collections import OrderedDict from collections import OrderedDict
from html.parser import HTMLParser
import pyrogram import pyrogram
from pyrogram.api.types import ( from pyrogram.api import types
MessageEntityBold as Bold,
MessageEntityItalic as Italic,
MessageEntityCode as Code,
MessageEntityTextUrl as Url,
MessageEntityPre as Pre,
MessageEntityUnderline as Underline,
MessageEntityStrike as Strike,
MessageEntityBlockquote as Blockquote,
MessageEntityMentionName as MentionInvalid,
InputMessageEntityMentionName as Mention,
)
from pyrogram.errors import PeerIdInvalid from pyrogram.errors import PeerIdInvalid
from . import utils from . import utils
class HTML: class Parser(HTMLParser):
HTML_RE = re.compile(r"<(\w+)(?: href=([\"'])([^<]+)\2)?>([^>]+)</\1>")
MENTION_RE = re.compile(r"tg://user\?id=(\d+)") MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
def __init__(self, client: "pyrogram.BaseClient"):
super().__init__()
self.client = client
self.text = ""
self.entities = []
self.tag_entities = {}
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
extra = {}
if tag in ["b", "strong"]:
entity = types.MessageEntityBold
elif tag in ["i", "em"]:
entity = types.MessageEntityItalic
elif tag == "u":
entity = types.MessageEntityUnderline
elif tag in ["s", "del", "strike"]:
entity = types.MessageEntityStrike
elif tag == "blockquote":
entity = types.MessageEntityBlockquote
elif tag == "code":
entity = types.MessageEntityCode
elif tag == "pre":
entity = types.MessageEntityPre
extra["language"] = ""
elif tag == "a":
url = attrs.get("href", "")
mention = Parser.MENTION_RE.match(url)
if mention:
entity = types.InputMessageEntityMentionName
extra["user_id"] = int(mention.group(1))
else:
entity = types.MessageEntityTextUrl
extra["url"] = url
else:
return
if tag not in self.tag_entities:
self.tag_entities[tag] = []
self.tag_entities[tag].append(entity(offset=len(self.text), length=0, **extra))
def handle_data(self, data):
data = html.unescape(data)
for entities in self.tag_entities.values():
for entity in entities:
entity.length += len(data)
self.text += data
def handle_endtag(self, tag):
try:
self.entities.append(self.tag_entities[tag].pop())
except (KeyError, IndexError):
line, offset = self.getpos()
offset += 1
raise ValueError("Unmatched closing tag </{}> at line {}:{}".format(tag, line, offset))
else:
if not self.tag_entities[tag]:
self.tag_entities.pop(tag)
def error(self, message):
pass
class HTML:
def __init__(self, client: "pyrogram.BaseClient" = None): def __init__(self, client: "pyrogram.BaseClient" = None):
self.client = client self.client = client
async def parse(self, message: str): async def parse(self, text: str):
message = utils.add_surrogates(str(message or "")) text = utils.add_surrogates(str(text or "").strip())
parser = Parser(self.client)
parser.feed(text)
parser.close()
if parser.tag_entities:
unclosed_tags = []
for tag, entities in parser.tag_entities.items():
unclosed_tags.append("<{}> (x{})".format(tag, len(entities)))
raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags)))
entities = [] entities = []
offset = 0
for match in self.HTML_RE.finditer(message): for entity in parser.entities:
start = match.start() - offset if isinstance(entity, types.InputMessageEntityMentionName):
style, url, body = match.group(1, 3, 4) try:
entity.user_id = await self.client.resolve_peer(entity.user_id)
if url: except PeerIdInvalid:
mention = self.MENTION_RE.match(url)
if mention:
user_id = int(mention.group(1))
try:
input_user = await self.client.resolve_peer(user_id)
except PeerIdInvalid:
input_user = None
entity = (
Mention(offset=start, length=len(body), user_id=input_user)
if input_user else MentionInvalid(offset=start, length=len(body), user_id=user_id)
)
else:
entity = Url(offset=start, length=len(body), url=url)
else:
if style == "b" or style == "strong":
entity = Bold(offset=start, length=len(body))
elif style == "i" or style == "em":
entity = Italic(offset=start, length=len(body))
elif style == "code":
entity = Code(offset=start, length=len(body))
elif style == "pre":
entity = Pre(offset=start, length=len(body), language="")
elif style == "u":
entity = Underline(offset=start, length=len(body))
elif style in ["strike", "s", "del"]:
entity = Strike(offset=start, length=len(body))
elif style == "blockquote":
entity = Blockquote(offset=start, length=len(body))
else:
continue continue
entities.append(entity) entities.append(entity)
message = message.replace(match.group(), body)
offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0)
# TODO: OrderedDict to be removed in Python3.6 # TODO: OrderedDict to be removed in Python 3.6
return OrderedDict([ return OrderedDict([
("message", utils.remove_surrogates(message)), ("message", utils.remove_surrogates(parser.text)),
("entities", entities) ("entities", entities)
]) ])
def unparse(self, message: str, entities: list): @staticmethod
message = utils.add_surrogates(message).strip() def unparse(text: str, entities: list):
offset = 0 text = utils.add_surrogates(text)
copy = text
for entity in entities: for entity in entities:
start = entity.offset + offset start = entity.offset
end = start + entity.length
type = entity.type type = entity.type
url = entity.url url = entity.url
user = entity.user user = entity.user
sub = message[start: start + entity.length]
sub = copy[start:end]
if type == "bold": if type == "bold":
style = "b" style = "b"
elif type == "italic": elif type == "italic":
style = "i" style = "i"
elif type == "code":
style = "code"
elif type == "pre":
style = "pre"
elif type == "underline": elif type == "underline":
style = "u" style = "u"
elif type == "strike": elif type == "strike":
style = "s" style = "s"
elif type == "code":
style = "code"
elif type == "pre":
style = "pre"
elif type == "blockquote": elif type == "blockquote":
style = "blockquote" style = "blockquote"
elif type == "text_link": elif type == "text_link":
offset += 15 + len(url) text = text[:start] + text[start:].replace(sub, '<a href="{}">{}</a>'.format(url, sub), 1)
message = message[:start] + message[start:].replace(
sub, "<a href=\"{}\">{}</a>".format(url, sub), 1)
continue continue
elif type == "text_mention": elif type == "text_mention":
offset += 28 + len(str(user.id)) text = text[:start] + text[start:].replace(
message = message[:start] + message[start:].replace( sub, '<a href="tg://user?id={}">{}</a>'.format(user.id, sub), 1)
sub, "<a href=\"tg://user?id={}\">{}</a>".format(user.id, sub), 1)
continue continue
else: else:
continue continue
offset += len(style) * 2 + 5 text = text[:start] + text[start:].replace(sub, "<{0}>{1}</{0}>".format(style, sub), 1)
message = message[:start] + message[start:].replace(
sub, "<{0}>{1}</{0}>".format(style, sub), 1)
return utils.remove_surrogates(message) return utils.remove_surrogates(text)

View File

@ -16,147 +16,142 @@
# You should have received a copy of the GNU Lesser General Public License # You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>. # along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import html
import re import re
from collections import OrderedDict
import pyrogram import pyrogram
from pyrogram.api.types import (
MessageEntityBold as Bold,
MessageEntityItalic as Italic,
MessageEntityCode as Code,
MessageEntityTextUrl as Url,
MessageEntityPre as Pre,
MessageEntityUnderline as Underline,
MessageEntityStrike as Strike,
MessageEntityMentionName as MentionInvalid,
InputMessageEntityMentionName as Mention
)
from pyrogram.errors import PeerIdInvalid
from . import utils from . import utils
from .html import HTML
BOLD_DELIM = "**"
ITALIC_DELIM = "__"
UNDERLINE_DELIM = "--"
STRIKE_DELIM = "~~"
CODE_DELIM = "`"
PRE_DELIM = "```"
class Markdown: class Markdown:
BOLD_DELIMITER = "**" MARKDOWN_RE = re.compile(r"({d})".format(
ITALIC_DELIMITER = "__"
UNDERLINE_DELIMITER = "--"
STRIKE_DELIMITER = "~~"
CODE_DELIMITER = "`"
PRE_DELIMITER = "```"
MARKDOWN_RE = re.compile(r"({d})([\w\W]*?)\1|\[([^[]+?)\]\(([^(]+?)\)".format(
d="|".join( d="|".join(
["".join(i) for i in [ ["".join(i) for i in [
["\{}".format(j) for j in i] [r"\{}".format(j) for j in i]
for i in [ for i in [
PRE_DELIMITER, PRE_DELIM,
CODE_DELIMITER, CODE_DELIM,
STRIKE_DELIMITER, STRIKE_DELIM,
UNDERLINE_DELIMITER, UNDERLINE_DELIM,
ITALIC_DELIMITER, ITALIC_DELIM,
BOLD_DELIMITER BOLD_DELIM
] ]
]] ]]
) )))
))
MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
def __init__(self, client: "pyrogram.BaseClient" = None): URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
self.client = client
OPENING_TAG = "<{}>"
CLOSING_TAG = "</{}>"
URL_MARKUP = '<a href="{}">{}</a>'
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
def __init__(self, client: "pyrogram.BaseClient"):
self.html = HTML(client)
async def parse(self, text: str):
text = html.escape(text)
async def parse(self, message: str):
message = utils.add_surrogates(str(message or "")).strip()
entities = []
offset = 0 offset = 0
delims = set()
for match in self.MARKDOWN_RE.finditer(message): for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
start = match.start() - offset start, stop = match.span()
style, body, text, url = match.groups() delim = match.group(1)
if url: if delim == BOLD_DELIM:
mention = self.MENTION_RE.match(url) tag = "b"
elif delim == ITALIC_DELIM:
if mention: tag = "i"
user_id = int(mention.group(1)) elif delim == UNDERLINE_DELIM:
tag = "u"
try: elif delim == STRIKE_DELIM:
input_user = await self.client.resolve_peer(user_id) tag = "s"
except PeerIdInvalid: elif delim == CODE_DELIM:
input_user = None tag = "code"
elif delim == PRE_DELIM:
entity = ( tag = "pre"
Mention(offset=start, length=len(text), user_id=input_user)
if input_user else MentionInvalid(offset=start, length=len(text), user_id=user_id)
)
else:
entity = Url(offset=start, length=len(text), url=url)
body = text
offset += len(url) + 4
else: else:
if style == self.BOLD_DELIMITER: continue
entity = Bold(offset=start, length=len(body))
elif style == self.ITALIC_DELIMITER:
entity = Italic(offset=start, length=len(body))
elif style == self.UNDERLINE_DELIMITER:
entity = Underline(offset=start, length=len(body))
elif style == self.STRIKE_DELIMITER:
entity = Strike(offset=start, length=len(body))
elif style == self.CODE_DELIMITER:
entity = Code(offset=start, length=len(body))
elif style == self.PRE_DELIMITER:
entity = Pre(offset=start, length=len(body), language="")
else:
continue
offset += len(style) * 2 if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
continue
entities.append(entity) if delim not in delims:
message = message.replace(match.group(), body) delims.add(delim)
tag = Markdown.OPENING_TAG.format(tag)
else:
delims.remove(delim)
tag = Markdown.CLOSING_TAG.format(tag)
# TODO: OrderedDict to be removed in Python3.6 text = text[:start + offset] + tag + text[stop + offset:]
return OrderedDict([
("message", utils.remove_surrogates(message)), offset += len(tag) - len(delim)
("entities", entities)
])
def unparse(self, message: str, entities: list):
message = utils.add_surrogates(message).strip()
offset = 0 offset = 0
for match in re.finditer(Markdown.URL_RE, text):
start, stop = match.span()
full = match.group(0)
body, url = match.groups()
replace = Markdown.URL_MARKUP.format(url, body)
text = text[:start + offset] + replace + text[stop + offset:]
offset += len(replace) - len(full)
return await self.html.parse(text)
@staticmethod
def unparse(text: str, entities: list):
text = utils.add_surrogates(text)
copy = text
for entity in entities: for entity in entities:
start = entity.offset + offset start = entity.offset
end = start + entity.length
type = entity.type type = entity.type
url = entity.url url = entity.url
user = entity.user user = entity.user
sub = message[start: start + entity.length]
sub = copy[start:end]
if type == "bold": if type == "bold":
style = self.BOLD_DELIMITER style = BOLD_DELIM
elif type == "italic": elif type == "italic":
style = self.ITALIC_DELIMITER style = ITALIC_DELIM
elif type == "underline": elif type == "underline":
style = self.UNDERLINE_DELIMITER style = UNDERLINE_DELIM
elif type == "strike": elif type == "strike":
style = self.STRIKE_DELIMITER style = STRIKE_DELIM
elif type == "code": elif type == "code":
style = self.CODE_DELIMITER style = CODE_DELIM
elif type == "pre": elif type == "pre":
style = self.PRE_DELIMITER style = PRE_DELIM
# TODO: Blockquote for MD
# elif type == "blockquote":
# style = ...
elif type == "text_link": elif type == "text_link":
offset += 4 + len(url) text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
message = message[:start] + message[start:].replace(
sub, "[{}]({})".format(sub, url), 1)
continue continue
elif type == "text_mention": elif type == "text_mention":
offset += 17 + len(str(user.id)) text = text[:start] + text[start:].replace(
message = message[:start] + message[start:].replace( sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
sub, "[{}](tg://user?id={})".format(sub, user.id), 1)
continue continue
else: else:
continue continue
offset += len(style) * 2 text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
message = message[:start] + message[start:].replace(
sub, "{0}{1}{0}".format(style, sub), 1)
return utils.remove_surrogates(message) return utils.remove_surrogates(text)

View File

@ -31,32 +31,30 @@ from ..object import Object
from ..update import Update from ..update import Update
from ..user_and_chats.chat import Chat from ..user_and_chats.chat import Chat
from ..user_and_chats.user import User from ..user_and_chats.user import User
from ...style import utils, Markdown, HTML
class Str(str): class Str(str):
def __init__(self, *args): def __init__(self, *args):
super().__init__() super().__init__()
self._client = None self.entities = None
self._entities = None
def init(self, client, entities): def init(self, entities):
self._client = client self.entities = entities
self._entities = entities
return self return self
@property
def text(self):
return self
@property @property
def markdown(self): def markdown(self):
return self._client.markdown.unparse(self, self._entities) return Markdown.unparse(self, self.entities)
@property @property
def html(self): def html(self):
return self._client.html.unparse(self, self._entities) return HTML.unparse(self, self.entities)
def __getitem__(self, item):
return utils.remove_surrogates(utils.add_surrogates(self)[item])
class Message(Object, Update): class Message(Object, Update):
@ -486,7 +484,7 @@ class Message(Object, Update):
if isinstance(message, types.Message): if isinstance(message, types.Message):
entities = [MessageEntity._parse(client, entity, users) for entity in message.entities] entities = [MessageEntity._parse(client, entity, users) for entity in message.entities]
entities = list(filter(lambda x: x is not None, entities)) entities = pyrogram.List(filter(lambda x: x is not None, entities))
forward_from = None forward_from = None
forward_sender_name = None forward_sender_name = None
@ -603,8 +601,8 @@ class Message(Object, Update):
date=message.date, date=message.date,
chat=Chat._parse(client, message, users, chats), chat=Chat._parse(client, message, users, chats),
from_user=User._parse(client, users.get(message.from_id, None)), from_user=User._parse(client, users.get(message.from_id, None)),
text=Str(message.message).init(client, entities) or None if media is None else None, text=Str(message.message).init(entities) or None if media is None else None,
caption=Str(message.message).init(client, entities) or None if media is not None else None, caption=Str(message.message).init(entities) or None if media is not None else None,
entities=entities or None if media is None else None, entities=entities or None if media is None else None,
caption_entities=entities or None if media is not None else None, caption_entities=entities or None if media is not None else None,
author_signature=message.post_author, author_signature=message.post_author,