Add strikethrough/underline <-> unicode converter to formatter
This commit is contained in:
@@ -25,7 +25,8 @@ from telethon_aio.tl.types import *
|
||||
|
||||
from .. import user as u, puppet as pu, portal as po
|
||||
from ..db import Message as DBMessage
|
||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text)
|
||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
||||
trim_reply_fallback_text, html_to_unicode)
|
||||
|
||||
log = logging.getLogger("mau.fmt.mx")
|
||||
|
||||
@@ -35,7 +36,7 @@ class MatrixParser(HTMLParser):
|
||||
room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
|
||||
block_tags = ("br", "p", "pre", "blockquote",
|
||||
"ol", "ul", "li",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6"
|
||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"div", "hr", "table")
|
||||
|
||||
def __init__(self):
|
||||
@@ -159,6 +160,14 @@ class MatrixParser(HTMLParser):
|
||||
text = url
|
||||
elif previous_tag == "command":
|
||||
text = f"/{text}"
|
||||
|
||||
# Strikethrough
|
||||
if "del" in self._open_tags:
|
||||
text = html_to_unicode(text, "\u0336")
|
||||
# Underline
|
||||
if "u" in self._open_tags:
|
||||
text = html_to_unicode(text, "\u0332")
|
||||
|
||||
list_entry_handled_once = False
|
||||
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
|
||||
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
|
||||
|
||||
@@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError
|
||||
from .. import user as u, puppet as pu, portal as po
|
||||
from ..db import Message as DBMessage
|
||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
||||
trim_reply_fallback_text)
|
||||
trim_reply_fallback_text, unicode_to_html)
|
||||
|
||||
log = logging.getLogger("mau.fmt.tg")
|
||||
|
||||
@@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False):
|
||||
text += f"\n- {evt.post_author}"
|
||||
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
|
||||
|
||||
html = unicode_to_html(text, html, "\u0336", "del")
|
||||
html = unicode_to_html(text, html, "\u0332", "u")
|
||||
|
||||
if html:
|
||||
html = html.replace("\n", "<br/>")
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from html import escape
|
||||
import struct
|
||||
import re
|
||||
|
||||
@@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^<blockquote data-mx-reply>[\s\S]+?</bl
|
||||
|
||||
def trim_reply_fallback_html(html):
|
||||
return HTML_REPLY_FALLBACK_REGEX.sub("", html)
|
||||
|
||||
|
||||
def unicode_to_html(text, html, ctrl, tag):
|
||||
if "\u0336" not in text and "\u0332" not in text:
|
||||
return html
|
||||
if not html:
|
||||
html = escape(text)
|
||||
tag_start = f"<{tag}>"
|
||||
tag_end = f"</{tag}>"
|
||||
characters = html.split(ctrl)
|
||||
html = ""
|
||||
in_del = False
|
||||
for char in characters:
|
||||
if not in_del:
|
||||
if len(char) > 1:
|
||||
html += char[0:-1]
|
||||
char = char[-1]
|
||||
html += tag_start
|
||||
in_del = True
|
||||
html += char
|
||||
else:
|
||||
if len(char) > 1:
|
||||
html += tag_end
|
||||
in_del = False
|
||||
html += char
|
||||
return html
|
||||
|
||||
|
||||
def html_to_unicode(text, ctrl):
|
||||
return ctrl.join(text) + ctrl
|
||||
|
||||
Reference in New Issue
Block a user