diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index f34f615a..3437aeae 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ -github: delivrance +# github: delivrance custom: https://docs.pyrogram.org/support-pyrogram diff --git a/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv b/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv index 446fe908..4bbea8ea 100644 --- a/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv +++ b/compiler/error/source/500_INTERNAL_SERVER_ERROR.tsv @@ -9,4 +9,5 @@ RANDOM_ID_DUPLICATE Telegram is having internal problems. Please try again later WORKER_BUSY_TOO_LONG_RETRY Telegram is having internal problems. Please try again later INTERDC_X_CALL_ERROR Telegram is having internal problems at DC{x}. Please try again later INTERDC_X_CALL_RICH_ERROR Telegram is having internal problems at DC{x}. Please try again later -FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later \ No newline at end of file +FOLDER_DEAC_AUTOFIX_ALL Telegram is having internal problems. Please try again later +MSGID_DECREASE_RETRY Telegram is having internal problems. Please try again later \ No newline at end of file diff --git a/pyrogram/client/style/html.py b/pyrogram/client/style/html.py index 4d3e3be2..31d7f797 100644 --- a/pyrogram/client/style/html.py +++ b/pyrogram/client/style/html.py @@ -16,127 +16,168 @@ # You should have received a copy of the GNU Lesser General Public License # along with Pyrogram. If not, see . +import html import re from collections import OrderedDict +from html.parser import HTMLParser import pyrogram -from pyrogram.api.types import ( - MessageEntityBold as Bold, - MessageEntityItalic as Italic, - MessageEntityCode as Code, - MessageEntityTextUrl as Url, - MessageEntityPre as Pre, - MessageEntityUnderline as Underline, - MessageEntityStrike as Strike, - MessageEntityBlockquote as Blockquote, - MessageEntityMentionName as MentionInvalid, - InputMessageEntityMentionName as Mention, -) +from pyrogram.api import types from pyrogram.errors import PeerIdInvalid from . import utils -class HTML: - HTML_RE = re.compile(r"<(\w+)(?: href=([\"'])([^<]+)\2)?>([^>]+)") +class Parser(HTMLParser): MENTION_RE = re.compile(r"tg://user\?id=(\d+)") + def __init__(self, client: "pyrogram.BaseClient"): + super().__init__() + + self.client = client + + self.text = "" + self.entities = [] + self.tag_entities = {} + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + extra = {} + + if tag in ["b", "strong"]: + entity = types.MessageEntityBold + elif tag in ["i", "em"]: + entity = types.MessageEntityItalic + elif tag == "u": + entity = types.MessageEntityUnderline + elif tag in ["s", "del", "strike"]: + entity = types.MessageEntityStrike + elif tag == "blockquote": + entity = types.MessageEntityBlockquote + elif tag == "code": + entity = types.MessageEntityCode + elif tag == "pre": + entity = types.MessageEntityPre + extra["language"] = "" + elif tag == "a": + url = attrs.get("href", "") + + mention = Parser.MENTION_RE.match(url) + + if mention: + entity = types.InputMessageEntityMentionName + extra["user_id"] = int(mention.group(1)) + else: + entity = types.MessageEntityTextUrl + extra["url"] = url + else: + return + + if tag not in self.tag_entities: + self.tag_entities[tag] = [] + + self.tag_entities[tag].append(entity(offset=len(self.text), length=0, **extra)) + + def handle_data(self, data): + data = html.unescape(data) + + for entities in self.tag_entities.values(): + for entity in entities: + entity.length += len(data) + + self.text += data + + def handle_endtag(self, tag): + try: + self.entities.append(self.tag_entities[tag].pop()) + except (KeyError, IndexError): + line, offset = self.getpos() + offset += 1 + + raise ValueError("Unmatched closing tag at line {}:{}".format(tag, line, offset)) + else: + if not self.tag_entities[tag]: + self.tag_entities.pop(tag) + + def error(self, message): + pass + + +class HTML: def __init__(self, client: "pyrogram.BaseClient" = None): self.client = client - async def parse(self, message: str): - message = utils.add_surrogates(str(message or "")) + async def parse(self, text: str): + text = utils.add_surrogates(str(text or "").strip()) + + parser = Parser(self.client) + parser.feed(text) + parser.close() + + if parser.tag_entities: + unclosed_tags = [] + + for tag, entities in parser.tag_entities.items(): + unclosed_tags.append("<{}> (x{})".format(tag, len(entities))) + + raise ValueError("Unclosed tags: {}".format(", ".join(unclosed_tags))) + entities = [] - offset = 0 - for match in self.HTML_RE.finditer(message): - start = match.start() - offset - style, url, body = match.group(1, 3, 4) - - if url: - mention = self.MENTION_RE.match(url) - - if mention: - user_id = int(mention.group(1)) - - try: - input_user = await self.client.resolve_peer(user_id) - except PeerIdInvalid: - input_user = None - - entity = ( - Mention(offset=start, length=len(body), user_id=input_user) - if input_user else MentionInvalid(offset=start, length=len(body), user_id=user_id) - ) - else: - entity = Url(offset=start, length=len(body), url=url) - else: - if style == "b" or style == "strong": - entity = Bold(offset=start, length=len(body)) - elif style == "i" or style == "em": - entity = Italic(offset=start, length=len(body)) - elif style == "code": - entity = Code(offset=start, length=len(body)) - elif style == "pre": - entity = Pre(offset=start, length=len(body), language="") - elif style == "u": - entity = Underline(offset=start, length=len(body)) - elif style in ["strike", "s", "del"]: - entity = Strike(offset=start, length=len(body)) - elif style == "blockquote": - entity = Blockquote(offset=start, length=len(body)) - else: + for entity in parser.entities: + if isinstance(entity, types.InputMessageEntityMentionName): + try: + entity.user_id = await self.client.resolve_peer(entity.user_id) + except PeerIdInvalid: continue entities.append(entity) - message = message.replace(match.group(), body) - offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0) - # TODO: OrderedDict to be removed in Python3.6 + # TODO: OrderedDict to be removed in Python 3.6 return OrderedDict([ - ("message", utils.remove_surrogates(message)), + ("message", utils.remove_surrogates(parser.text)), ("entities", entities) ]) - def unparse(self, message: str, entities: list): - message = utils.add_surrogates(message).strip() - offset = 0 + @staticmethod + def unparse(text: str, entities: list): + text = utils.add_surrogates(text) + copy = text for entity in entities: - start = entity.offset + offset + start = entity.offset + end = start + entity.length + type = entity.type + url = entity.url user = entity.user - sub = message[start: start + entity.length] + + sub = copy[start:end] if type == "bold": style = "b" elif type == "italic": style = "i" - elif type == "code": - style = "code" - elif type == "pre": - style = "pre" elif type == "underline": style = "u" elif type == "strike": style = "s" + elif type == "code": + style = "code" + elif type == "pre": + style = "pre" elif type == "blockquote": style = "blockquote" elif type == "text_link": - offset += 15 + len(url) - message = message[:start] + message[start:].replace( - sub, "{}".format(url, sub), 1) + text = text[:start] + text[start:].replace(sub, '{}'.format(url, sub), 1) continue elif type == "text_mention": - offset += 28 + len(str(user.id)) - message = message[:start] + message[start:].replace( - sub, "{}".format(user.id, sub), 1) + text = text[:start] + text[start:].replace( + sub, '{}'.format(user.id, sub), 1) continue else: continue - offset += len(style) * 2 + 5 - message = message[:start] + message[start:].replace( - sub, "<{0}>{1}".format(style, sub), 1) + text = text[:start] + text[start:].replace(sub, "<{0}>{1}".format(style, sub), 1) - return utils.remove_surrogates(message) + return utils.remove_surrogates(text) diff --git a/pyrogram/client/style/markdown.py b/pyrogram/client/style/markdown.py index 6d9dc650..73bb3877 100644 --- a/pyrogram/client/style/markdown.py +++ b/pyrogram/client/style/markdown.py @@ -16,147 +16,142 @@ # You should have received a copy of the GNU Lesser General Public License # along with Pyrogram. If not, see . +import html import re -from collections import OrderedDict import pyrogram -from pyrogram.api.types import ( - MessageEntityBold as Bold, - MessageEntityItalic as Italic, - MessageEntityCode as Code, - MessageEntityTextUrl as Url, - MessageEntityPre as Pre, - MessageEntityUnderline as Underline, - MessageEntityStrike as Strike, - MessageEntityMentionName as MentionInvalid, - InputMessageEntityMentionName as Mention -) -from pyrogram.errors import PeerIdInvalid from . import utils +from .html import HTML + +BOLD_DELIM = "**" +ITALIC_DELIM = "__" +UNDERLINE_DELIM = "--" +STRIKE_DELIM = "~~" +CODE_DELIM = "`" +PRE_DELIM = "```" class Markdown: - BOLD_DELIMITER = "**" - ITALIC_DELIMITER = "__" - UNDERLINE_DELIMITER = "--" - STRIKE_DELIMITER = "~~" - CODE_DELIMITER = "`" - PRE_DELIMITER = "```" - - MARKDOWN_RE = re.compile(r"({d})([\w\W]*?)\1|\[([^[]+?)\]\(([^(]+?)\)".format( + MARKDOWN_RE = re.compile(r"({d})".format( d="|".join( ["".join(i) for i in [ - ["\{}".format(j) for j in i] + [r"\{}".format(j) for j in i] for i in [ - PRE_DELIMITER, - CODE_DELIMITER, - STRIKE_DELIMITER, - UNDERLINE_DELIMITER, - ITALIC_DELIMITER, - BOLD_DELIMITER + PRE_DELIM, + CODE_DELIM, + STRIKE_DELIM, + UNDERLINE_DELIM, + ITALIC_DELIM, + BOLD_DELIM ] ]] - ) - )) - MENTION_RE = re.compile(r"tg://user\?id=(\d+)") + ))) - def __init__(self, client: "pyrogram.BaseClient" = None): - self.client = client + URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)") + + OPENING_TAG = "<{}>" + CLOSING_TAG = "" + URL_MARKUP = '{}' + FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM] + + def __init__(self, client: "pyrogram.BaseClient"): + self.html = HTML(client) + + async def parse(self, text: str): + text = html.escape(text) - async def parse(self, message: str): - message = utils.add_surrogates(str(message or "")).strip() - entities = [] offset = 0 + delims = set() - for match in self.MARKDOWN_RE.finditer(message): - start = match.start() - offset - style, body, text, url = match.groups() + for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)): + start, stop = match.span() + delim = match.group(1) - if url: - mention = self.MENTION_RE.match(url) - - if mention: - user_id = int(mention.group(1)) - - try: - input_user = await self.client.resolve_peer(user_id) - except PeerIdInvalid: - input_user = None - - entity = ( - Mention(offset=start, length=len(text), user_id=input_user) - if input_user else MentionInvalid(offset=start, length=len(text), user_id=user_id) - ) - else: - entity = Url(offset=start, length=len(text), url=url) - - body = text - offset += len(url) + 4 + if delim == BOLD_DELIM: + tag = "b" + elif delim == ITALIC_DELIM: + tag = "i" + elif delim == UNDERLINE_DELIM: + tag = "u" + elif delim == STRIKE_DELIM: + tag = "s" + elif delim == CODE_DELIM: + tag = "code" + elif delim == PRE_DELIM: + tag = "pre" else: - if style == self.BOLD_DELIMITER: - entity = Bold(offset=start, length=len(body)) - elif style == self.ITALIC_DELIMITER: - entity = Italic(offset=start, length=len(body)) - elif style == self.UNDERLINE_DELIMITER: - entity = Underline(offset=start, length=len(body)) - elif style == self.STRIKE_DELIMITER: - entity = Strike(offset=start, length=len(body)) - elif style == self.CODE_DELIMITER: - entity = Code(offset=start, length=len(body)) - elif style == self.PRE_DELIMITER: - entity = Pre(offset=start, length=len(body), language="") - else: - continue + continue - offset += len(style) * 2 + if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS): + continue - entities.append(entity) - message = message.replace(match.group(), body) + if delim not in delims: + delims.add(delim) + tag = Markdown.OPENING_TAG.format(tag) + else: + delims.remove(delim) + tag = Markdown.CLOSING_TAG.format(tag) - # TODO: OrderedDict to be removed in Python3.6 - return OrderedDict([ - ("message", utils.remove_surrogates(message)), - ("entities", entities) - ]) + text = text[:start + offset] + tag + text[stop + offset:] + + offset += len(tag) - len(delim) - def unparse(self, message: str, entities: list): - message = utils.add_surrogates(message).strip() offset = 0 + for match in re.finditer(Markdown.URL_RE, text): + start, stop = match.span() + full = match.group(0) + + body, url = match.groups() + replace = Markdown.URL_MARKUP.format(url, body) + + text = text[:start + offset] + replace + text[stop + offset:] + + offset += len(replace) - len(full) + + return await self.html.parse(text) + + @staticmethod + def unparse(text: str, entities: list): + text = utils.add_surrogates(text) + copy = text + for entity in entities: - start = entity.offset + offset + start = entity.offset + end = start + entity.length + type = entity.type + url = entity.url user = entity.user - sub = message[start: start + entity.length] + + sub = copy[start:end] if type == "bold": - style = self.BOLD_DELIMITER + style = BOLD_DELIM elif type == "italic": - style = self.ITALIC_DELIMITER + style = ITALIC_DELIM elif type == "underline": - style = self.UNDERLINE_DELIMITER + style = UNDERLINE_DELIM elif type == "strike": - style = self.STRIKE_DELIMITER + style = STRIKE_DELIM elif type == "code": - style = self.CODE_DELIMITER + style = CODE_DELIM elif type == "pre": - style = self.PRE_DELIMITER + style = PRE_DELIM + # TODO: Blockquote for MD + # elif type == "blockquote": + # style = ... elif type == "text_link": - offset += 4 + len(url) - message = message[:start] + message[start:].replace( - sub, "[{}]({})".format(sub, url), 1) + text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1) continue elif type == "text_mention": - offset += 17 + len(str(user.id)) - message = message[:start] + message[start:].replace( - sub, "[{}](tg://user?id={})".format(sub, user.id), 1) + text = text[:start] + text[start:].replace( + sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1) continue else: continue - offset += len(style) * 2 - message = message[:start] + message[start:].replace( - sub, "{0}{1}{0}".format(style, sub), 1) + text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1) - return utils.remove_surrogates(message) + return utils.remove_surrogates(text) diff --git a/pyrogram/client/types/messages_and_media/message.py b/pyrogram/client/types/messages_and_media/message.py index 5ea0b35d..47667b40 100644 --- a/pyrogram/client/types/messages_and_media/message.py +++ b/pyrogram/client/types/messages_and_media/message.py @@ -31,32 +31,30 @@ from ..object import Object from ..update import Update from ..user_and_chats.chat import Chat from ..user_and_chats.user import User +from ...style import utils, Markdown, HTML class Str(str): def __init__(self, *args): super().__init__() - self._client = None - self._entities = None + self.entities = None - def init(self, client, entities): - self._client = client - self._entities = entities + def init(self, entities): + self.entities = entities return self - @property - def text(self): - return self - @property def markdown(self): - return self._client.markdown.unparse(self, self._entities) + return Markdown.unparse(self, self.entities) @property def html(self): - return self._client.html.unparse(self, self._entities) + return HTML.unparse(self, self.entities) + + def __getitem__(self, item): + return utils.remove_surrogates(utils.add_surrogates(self)[item]) class Message(Object, Update): @@ -486,7 +484,7 @@ class Message(Object, Update): if isinstance(message, types.Message): entities = [MessageEntity._parse(client, entity, users) for entity in message.entities] - entities = list(filter(lambda x: x is not None, entities)) + entities = pyrogram.List(filter(lambda x: x is not None, entities)) forward_from = None forward_sender_name = None @@ -603,8 +601,8 @@ class Message(Object, Update): date=message.date, chat=Chat._parse(client, message, users, chats), from_user=User._parse(client, users.get(message.from_id, None)), - text=Str(message.message).init(client, entities) or None if media is None else None, - caption=Str(message.message).init(client, entities) or None if media is not None else None, + text=Str(message.message).init(entities) or None if media is None else None, + caption=Str(message.message).init(entities) or None if media is not None else None, entities=entities or None if media is None else None, caption_entities=entities or None if media is not None else None, author_signature=message.post_author,