diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
index f34f615a..3437aeae 100644
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,2 +1,2 @@
-github: delivrance
+# github: delivrance
custom: https://docs.pyrogram.org/support-pyrogram
diff --git a/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv b/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv
index 446fe908..4bbea8ea 100644
--- a/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv
+++ b/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv
@@ -9,4 +9,5 @@ RANDOM_ID_DUPLICATE Telegram is having internal problems. Please try again later
WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later
INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later
INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later
-FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
\ No newline at end of file
+FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later
+MSGID_DECREASE_RETRY Telegram is having internal problems. Please try again later
\ No newline at end of file
diff --git a/pyrogram/client/style/html.py b/pyrogram/client/style/html.py
index 4d3e3be2..31d7f797 100644
--- a/pyrogram/client/style/html.py
+++ b/pyrogram/client/style/html.py
@@ -16,127 +16,168 @@
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see .
+import html
import re
from collections import OrderedDict
+from html.parser import HTMLParser
import pyrogram
-from pyrogram.api.types import (
- MessageEntityBold as Bold,
- MessageEntityItalic as Italic,
- MessageEntityCode as Code,
- MessageEntityTextUrl as Url,
- MessageEntityPre as Pre,
- MessageEntityUnderline as Underline,
- MessageEntityStrike as Strike,
- MessageEntityBlockquote as Blockquote,
- MessageEntityMentionName as MentionInvalid,
- InputMessageEntityMentionName as Mention,
-)
+from pyrogram.api import types
from pyrogram.errors import PeerIdInvalid
from . import utils
-class HTML:
- HTML_RE = re.compile(r"<(\w+)(?: href=([\"'])([^<]+)\2)?>([^>]+)\1>")
+class Parser(HTMLParser):
MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
+ def __init__(self, client: "pyrogram.BaseClient"):
+ super().__init__()
+
+ self.client = client
+
+ self.text = ""
+ self.entities = []
+ self.tag_entities = {}
+
+ def handle_starttag(self, tag, attrs):
+ attrs = dict(attrs)
+ extra = {}
+
+ if tag in ["b", "strong"]:
+ entity = types.MessageEntityBold
+ elif tag in ["i", "em"]:
+ entity = types.MessageEntityItalic
+ elif tag == "u":
+ entity = types.MessageEntityUnderline
+ elif tag in ["s", "del", "strike"]:
+ entity = types.MessageEntityStrike
+ elif tag == "blockquote":
+ entity = types.MessageEntityBlockquote
+ elif tag == "code":
+ entity = types.MessageEntityCode
+ elif tag == "pre":
+ entity = types.MessageEntityPre
+ extra["language"] = ""
+ elif tag == "a":
+ url = attrs.get("href", "")
+
+ mention = Parser.MENTION_RE.match(url)
+
+ if mention:
+ entity = types.InputMessageEntityMentionName
+ extra["user_id"] = int(mention.group(1))
+ else:
+ entity = types.MessageEntityTextUrl
+ extra["url"] = url
+ else:
+ return
+
+ if tag not in self.tag_entities:
+ self.tag_entities[tag] = []
+
+ self.tag_entities[tag].append(entity(offset=len(self.text), length=0, **extra))
+
+ def handle_data(self, data):
+ data = html.unescape(data)
+
+ for entities in self.tag_entities.values():
+ for entity in entities:
+ entity.length += len(data)
+
+ self.text += data
+
+ def handle_endtag(self, tag):
+ try:
+ self.entities.append(self.tag_entities[tag].pop())
+ except (KeyError, IndexError):
+ line, offset = self.getpos()
+ offset += 1
+
+ raise ValueError("Unmatched closing tag {}> at line {}:{}".format(tag, line, offset))
+ else:
+ if not self.tag_entities[tag]:
+ self.tag_entities.pop(tag)
+
+ def error(self, message):
+ pass
+
+
+class HTML:
def __init__(self, client: "pyrogram.BaseClient" = None):
self.client = client
- async def parse(self, message: str):
- message = utils.add_surrogates(str(message or ""))
+ async def parse(self, text: str):
+ text = utils.add_surrogates(str(text or "").strip())
+
+ parser = Parser(self.client)
+ parser.feed(text)
+ parser.close()
+
+ if parser.tag_entities:
+ unclosed_tags = []
+
+ for tag, entities in parser.tag_entities.items():
+ unclosed_tags.append("<{}> (x{})".format(tag, len(entities)))
+
+ raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags)))
+
entities = []
- offset = 0
- for match in self.HTML_RE.finditer(message):
- start = match.start() - offset
- style, url, body = match.group(1, 3, 4)
-
- if url:
- mention = self.MENTION_RE.match(url)
-
- if mention:
- user_id = int(mention.group(1))
-
- try:
- input_user = await self.client.resolve_peer(user_id)
- except PeerIdInvalid:
- input_user = None
-
- entity = (
- Mention(offset=start, length=len(body), user_id=input_user)
- if input_user else MentionInvalid(offset=start, length=len(body), user_id=user_id)
- )
- else:
- entity = Url(offset=start, length=len(body), url=url)
- else:
- if style == "b" or style == "strong":
- entity = Bold(offset=start, length=len(body))
- elif style == "i" or style == "em":
- entity = Italic(offset=start, length=len(body))
- elif style == "code":
- entity = Code(offset=start, length=len(body))
- elif style == "pre":
- entity = Pre(offset=start, length=len(body), language="")
- elif style == "u":
- entity = Underline(offset=start, length=len(body))
- elif style in ["strike", "s", "del"]:
- entity = Strike(offset=start, length=len(body))
- elif style == "blockquote":
- entity = Blockquote(offset=start, length=len(body))
- else:
+ for entity in parser.entities:
+ if isinstance(entity, types.InputMessageEntityMentionName):
+ try:
+ entity.user_id = await self.client.resolve_peer(entity.user_id)
+ except PeerIdInvalid:
continue
entities.append(entity)
- message = message.replace(match.group(), body)
- offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0)
- # TODO: OrderedDict to be removed in Python3.6
+ # TODO: OrderedDict to be removed in Python 3.6
return OrderedDict([
- ("message", utils.remove_surrogates(message)),
+ ("message", utils.remove_surrogates(parser.text)),
("entities", entities)
])
- def unparse(self, message: str, entities: list):
- message = utils.add_surrogates(message).strip()
- offset = 0
+ @staticmethod
+ def unparse(text: str, entities: list):
+ text = utils.add_surrogates(text)
+ copy = text
for entity in entities:
- start = entity.offset + offset
+ start = entity.offset
+ end = start + entity.length
+
type = entity.type
+
url = entity.url
user = entity.user
- sub = message[start: start + entity.length]
+
+ sub = copy[start:end]
if type == "bold":
style = "b"
elif type == "italic":
style = "i"
- elif type == "code":
- style = "code"
- elif type == "pre":
- style = "pre"
elif type == "underline":
style = "u"
elif type == "strike":
style = "s"
+ elif type == "code":
+ style = "code"
+ elif type == "pre":
+ style = "pre"
elif type == "blockquote":
style = "blockquote"
elif type == "text_link":
- offset += 15 + len(url)
- message = message[:start] + message[start:].replace(
- sub, "{}".format(url, sub), 1)
+ text = text[:start] + text[start:].replace(sub, '{}'.format(url, sub), 1)
continue
elif type == "text_mention":
- offset += 28 + len(str(user.id))
- message = message[:start] + message[start:].replace(
- sub, "{}".format(user.id, sub), 1)
+ text = text[:start] + text[start:].replace(
+ sub, '{}'.format(user.id, sub), 1)
continue
else:
continue
- offset += len(style) * 2 + 5
- message = message[:start] + message[start:].replace(
- sub, "<{0}>{1}{0}>".format(style, sub), 1)
+ text = text[:start] + text[start:].replace(sub, "<{0}>{1}{0}>".format(style, sub), 1)
- return utils.remove_surrogates(message)
+ return utils.remove_surrogates(text)
diff --git a/pyrogram/client/style/markdown.py b/pyrogram/client/style/markdown.py
index 6d9dc650..73bb3877 100644
--- a/pyrogram/client/style/markdown.py
+++ b/pyrogram/client/style/markdown.py
@@ -16,147 +16,142 @@
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see .
+import html
import re
-from collections import OrderedDict
import pyrogram
-from pyrogram.api.types import (
- MessageEntityBold as Bold,
- MessageEntityItalic as Italic,
- MessageEntityCode as Code,
- MessageEntityTextUrl as Url,
- MessageEntityPre as Pre,
- MessageEntityUnderline as Underline,
- MessageEntityStrike as Strike,
- MessageEntityMentionName as MentionInvalid,
- InputMessageEntityMentionName as Mention
-)
-from pyrogram.errors import PeerIdInvalid
from . import utils
+from .html import HTML
+
+BOLD_DELIM = "**"
+ITALIC_DELIM = "__"
+UNDERLINE_DELIM = "--"
+STRIKE_DELIM = "~~"
+CODE_DELIM = "`"
+PRE_DELIM = "```"
class Markdown:
- BOLD_DELIMITER = "**"
- ITALIC_DELIMITER = "__"
- UNDERLINE_DELIMITER = "--"
- STRIKE_DELIMITER = "~~"
- CODE_DELIMITER = "`"
- PRE_DELIMITER = "```"
-
- MARKDOWN_RE = re.compile(r"({d})([\w\W]*?)\1|\[([^[]+?)\]\(([^(]+?)\)".format(
+ MARKDOWN_RE = re.compile(r"({d})".format(
d="|".join(
["".join(i) for i in [
- ["\{}".format(j) for j in i]
+ [r"\{}".format(j) for j in i]
for i in [
- PRE_DELIMITER,
- CODE_DELIMITER,
- STRIKE_DELIMITER,
- UNDERLINE_DELIMITER,
- ITALIC_DELIMITER,
- BOLD_DELIMITER
+ PRE_DELIM,
+ CODE_DELIM,
+ STRIKE_DELIM,
+ UNDERLINE_DELIM,
+ ITALIC_DELIM,
+ BOLD_DELIM
]
]]
- )
- ))
- MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
+ )))
- def __init__(self, client: "pyrogram.BaseClient" = None):
- self.client = client
+ URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
+
+ OPENING_TAG = "<{}>"
+ CLOSING_TAG = "{}>"
+ URL_MARKUP = '{}'
+ FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
+
+ def __init__(self, client: "pyrogram.BaseClient"):
+ self.html = HTML(client)
+
+ async def parse(self, text: str):
+ text = html.escape(text)
- async def parse(self, message: str):
- message = utils.add_surrogates(str(message or "")).strip()
- entities = []
offset = 0
+ delims = set()
- for match in self.MARKDOWN_RE.finditer(message):
- start = match.start() - offset
- style, body, text, url = match.groups()
+ for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
+ start, stop = match.span()
+ delim = match.group(1)
- if url:
- mention = self.MENTION_RE.match(url)
-
- if mention:
- user_id = int(mention.group(1))
-
- try:
- input_user = await self.client.resolve_peer(user_id)
- except PeerIdInvalid:
- input_user = None
-
- entity = (
- Mention(offset=start, length=len(text), user_id=input_user)
- if input_user else MentionInvalid(offset=start, length=len(text), user_id=user_id)
- )
- else:
- entity = Url(offset=start, length=len(text), url=url)
-
- body = text
- offset += len(url) + 4
+ if delim == BOLD_DELIM:
+ tag = "b"
+ elif delim == ITALIC_DELIM:
+ tag = "i"
+ elif delim == UNDERLINE_DELIM:
+ tag = "u"
+ elif delim == STRIKE_DELIM:
+ tag = "s"
+ elif delim == CODE_DELIM:
+ tag = "code"
+ elif delim == PRE_DELIM:
+ tag = "pre"
else:
- if style == self.BOLD_DELIMITER:
- entity = Bold(offset=start, length=len(body))
- elif style == self.ITALIC_DELIMITER:
- entity = Italic(offset=start, length=len(body))
- elif style == self.UNDERLINE_DELIMITER:
- entity = Underline(offset=start, length=len(body))
- elif style == self.STRIKE_DELIMITER:
- entity = Strike(offset=start, length=len(body))
- elif style == self.CODE_DELIMITER:
- entity = Code(offset=start, length=len(body))
- elif style == self.PRE_DELIMITER:
- entity = Pre(offset=start, length=len(body), language="")
- else:
- continue
+ continue
- offset += len(style) * 2
+ if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
+ continue
- entities.append(entity)
- message = message.replace(match.group(), body)
+ if delim not in delims:
+ delims.add(delim)
+ tag = Markdown.OPENING_TAG.format(tag)
+ else:
+ delims.remove(delim)
+ tag = Markdown.CLOSING_TAG.format(tag)
- # TODO: OrderedDict to be removed in Python3.6
- return OrderedDict([
- ("message", utils.remove_surrogates(message)),
- ("entities", entities)
- ])
+ text = text[:start + offset] + tag + text[stop + offset:]
+
+ offset += len(tag) - len(delim)
- def unparse(self, message: str, entities: list):
- message = utils.add_surrogates(message).strip()
offset = 0
+ for match in re.finditer(Markdown.URL_RE, text):
+ start, stop = match.span()
+ full = match.group(0)
+
+ body, url = match.groups()
+ replace = Markdown.URL_MARKUP.format(url, body)
+
+ text = text[:start + offset] + replace + text[stop + offset:]
+
+ offset += len(replace) - len(full)
+
+ return await self.html.parse(text)
+
+ @staticmethod
+ def unparse(text: str, entities: list):
+ text = utils.add_surrogates(text)
+ copy = text
+
for entity in entities:
- start = entity.offset + offset
+ start = entity.offset
+ end = start + entity.length
+
type = entity.type
+
url = entity.url
user = entity.user
- sub = message[start: start + entity.length]
+
+ sub = copy[start:end]
if type == "bold":
- style = self.BOLD_DELIMITER
+ style = BOLD_DELIM
elif type == "italic":
- style = self.ITALIC_DELIMITER
+ style = ITALIC_DELIM
elif type == "underline":
- style = self.UNDERLINE_DELIMITER
+ style = UNDERLINE_DELIM
elif type == "strike":
- style = self.STRIKE_DELIMITER
+ style = STRIKE_DELIM
elif type == "code":
- style = self.CODE_DELIMITER
+ style = CODE_DELIM
elif type == "pre":
- style = self.PRE_DELIMITER
+ style = PRE_DELIM
+ # TODO: Blockquote for MD
+ # elif type == "blockquote":
+ # style = ...
elif type == "text_link":
- offset += 4 + len(url)
- message = message[:start] + message[start:].replace(
- sub, "[{}]({})".format(sub, url), 1)
+ text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
continue
elif type == "text_mention":
- offset += 17 + len(str(user.id))
- message = message[:start] + message[start:].replace(
- sub, "[{}](tg://user?id={})".format(sub, user.id), 1)
+ text = text[:start] + text[start:].replace(
+ sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
continue
else:
continue
- offset += len(style) * 2
- message = message[:start] + message[start:].replace(
- sub, "{0}{1}{0}".format(style, sub), 1)
+ text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
- return utils.remove_surrogates(message)
+ return utils.remove_surrogates(text)
diff --git a/pyrogram/client/types/messages_and_media/message.py b/pyrogram/client/types/messages_and_media/message.py
index 5ea0b35d..47667b40 100644
--- a/pyrogram/client/types/messages_and_media/message.py
+++ b/pyrogram/client/types/messages_and_media/message.py
@@ -31,32 +31,30 @@ from ..object import Object
from ..update import Update
from ..user_and_chats.chat import Chat
from ..user_and_chats.user import User
+from ...style import utils, Markdown, HTML
class Str(str):
def __init__(self, *args):
super().__init__()
- self._client = None
- self._entities = None
+ self.entities = None
- def init(self, client, entities):
- self._client = client
- self._entities = entities
+ def init(self, entities):
+ self.entities = entities
return self
- @property
- def text(self):
- return self
-
@property
def markdown(self):
- return self._client.markdown.unparse(self, self._entities)
+ return Markdown.unparse(self, self.entities)
@property
def html(self):
- return self._client.html.unparse(self, self._entities)
+ return HTML.unparse(self, self.entities)
+
+ def __getitem__(self, item):
+ return utils.remove_surrogates(utils.add_surrogates(self)[item])
class Message(Object, Update):
@@ -486,7 +484,7 @@ class Message(Object, Update):
if isinstance(message, types.Message):
entities = [MessageEntity._parse(client, entity, users) for entity in message.entities]
- entities = list(filter(lambda x: x is not None, entities))
+ entities = pyrogram.List(filter(lambda x: x is not None, entities))
forward_from = None
forward_sender_name = None
@@ -603,8 +601,8 @@ class Message(Object, Update):
date=message.date,
chat=Chat._parse(client, message, users, chats),
from_user=User._parse(client, users.get(message.from_id, None)),
- text=Str(message.message).init(client, entities) or None if media is None else None,
- caption=Str(message.message).init(client, entities) or None if media is not None else None,
+ text=Str(message.message).init(entities) or None if media is None else None,
+ caption=Str(message.message).init(entities) or None if media is not None else None,
entities=entities or None if media is None else None,
caption_entities=entities or None if media is not None else None,
author_signature=message.post_author,