mirror of
https://github.com/pyrogram/pyrogram
synced 2025-08-29 05:18:10 +00:00
Implement strict and loose markdown parsing
This is enabled by default: - strict: only markdown syntax is parsed - loose: both markdown and html syntax are parsed
This commit is contained in:
parent
e4a6d16cf3
commit
8d852cb47e
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
import html
|
import html
|
||||||
import re
|
import re
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
import pyrogram
|
import pyrogram
|
||||||
from . import utils
|
from . import utils
|
||||||
@ -30,9 +31,7 @@ STRIKE_DELIM = "~~"
|
|||||||
CODE_DELIM = "`"
|
CODE_DELIM = "`"
|
||||||
PRE_DELIM = "```"
|
PRE_DELIM = "```"
|
||||||
|
|
||||||
|
MARKDOWN_RE = re.compile(r"({d})|\[(.+?)\]\((.+?)\)".format(
|
||||||
class Markdown:
|
|
||||||
MARKDOWN_RE = re.compile(r"({d})".format(
|
|
||||||
d="|".join(
|
d="|".join(
|
||||||
["".join(i) for i in [
|
["".join(i) for i in [
|
||||||
[r"\{}".format(j) for j in i]
|
[r"\{}".format(j) for j in i]
|
||||||
@ -47,25 +46,37 @@ class Markdown:
|
|||||||
]]
|
]]
|
||||||
)))
|
)))
|
||||||
|
|
||||||
URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
|
OPENING_TAG = "<{}>"
|
||||||
|
CLOSING_TAG = "</{}>"
|
||||||
|
URL_MARKUP = '<a href="{}">{}</a>'
|
||||||
|
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
|
||||||
|
|
||||||
OPENING_TAG = "<{}>"
|
|
||||||
CLOSING_TAG = "</{}>"
|
|
||||||
URL_MARKUP = '<a href="{}">{}</a>'
|
|
||||||
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
|
|
||||||
|
|
||||||
def __init__(self, client: "pyrogram.BaseClient"):
|
class Markdown:
|
||||||
|
def __init__(self, client: Union["pyrogram.BaseClient", None]):
|
||||||
self.html = HTML(client)
|
self.html = HTML(client)
|
||||||
|
|
||||||
def parse(self, text: str):
|
def parse(self, text: str, strict: bool = False):
|
||||||
|
if strict:
|
||||||
text = html.escape(text)
|
text = html.escape(text)
|
||||||
|
|
||||||
offset = 0
|
|
||||||
delims = set()
|
delims = set()
|
||||||
|
is_fixed_width = False
|
||||||
|
|
||||||
for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
|
for i, match in enumerate(re.finditer(MARKDOWN_RE, text)):
|
||||||
start, stop = match.span()
|
start, _ = match.span()
|
||||||
delim = match.group(1)
|
delim, text_url, url = match.groups()
|
||||||
|
full = match.group(0)
|
||||||
|
|
||||||
|
if delim in FIXED_WIDTH_DELIMS:
|
||||||
|
is_fixed_width = not is_fixed_width
|
||||||
|
|
||||||
|
if is_fixed_width and delim not in FIXED_WIDTH_DELIMS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if text_url:
|
||||||
|
text = utils.replace_once(text, full, URL_MARKUP.format(url, text_url), start)
|
||||||
|
continue
|
||||||
|
|
||||||
if delim == BOLD_DELIM:
|
if delim == BOLD_DELIM:
|
||||||
tag = "b"
|
tag = "b"
|
||||||
@ -82,32 +93,14 @@ class Markdown:
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if delim not in delims:
|
if delim not in delims:
|
||||||
delims.add(delim)
|
delims.add(delim)
|
||||||
tag = Markdown.OPENING_TAG.format(tag)
|
tag = OPENING_TAG.format(tag)
|
||||||
else:
|
else:
|
||||||
delims.remove(delim)
|
delims.remove(delim)
|
||||||
tag = Markdown.CLOSING_TAG.format(tag)
|
tag = CLOSING_TAG.format(tag)
|
||||||
|
|
||||||
text = text[:start + offset] + tag + text[stop + offset:]
|
text = utils.replace_once(text, delim, tag, start)
|
||||||
|
|
||||||
offset += len(tag) - len(delim)
|
|
||||||
|
|
||||||
offset = 0
|
|
||||||
|
|
||||||
for match in re.finditer(Markdown.URL_RE, text):
|
|
||||||
start, stop = match.span()
|
|
||||||
full = match.group(0)
|
|
||||||
|
|
||||||
body, url = match.groups()
|
|
||||||
replace = Markdown.URL_MARKUP.format(url, body)
|
|
||||||
|
|
||||||
text = text[:start + offset] + replace + text[stop + offset:]
|
|
||||||
|
|
||||||
offset += len(replace) - len(full)
|
|
||||||
|
|
||||||
return self.html.parse(text)
|
return self.html.parse(text)
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user