Add strikethrough/underline <-> unicode converter to formatter

This commit is contained in:
Tulir Asokan
2018-03-07 14:03:38 +02:00
parent 13dddb4c10
commit a6f26c16fc
3 changed files with 46 additions and 3 deletions
+11 -2
View File
@@ -25,7 +25,8 @@ from telethon_aio.tl.types import *
from .. import user as u, puppet as pu, portal as po
from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text)
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text, html_to_unicode)
log = logging.getLogger("mau.fmt.mx")
@@ -35,7 +36,7 @@ class MatrixParser(HTMLParser):
room_regex = re.compile("https://matrix.to/#/(#.+:.+)")
block_tags = ("br", "p", "pre", "blockquote",
"ol", "ul", "li",
"h1", "h2", "h3", "h4", "h5", "h6"
"h1", "h2", "h3", "h4", "h5", "h6",
"div", "hr", "table")
def __init__(self):
@@ -159,6 +160,14 @@ class MatrixParser(HTMLParser):
text = url
elif previous_tag == "command":
text = f"/{text}"
# Strikethrough
if "del" in self._open_tags:
text = html_to_unicode(text, "\u0336")
# Underline
if "u" in self._open_tags:
text = html_to_unicode(text, "\u0332")
list_entry_handled_once = False
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
+4 -1
View File
@@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError
from .. import user as u, puppet as pu, portal as po
from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text)
trim_reply_fallback_text, unicode_to_html)
log = logging.getLogger("mau.fmt.tg")
@@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False):
text += f"\n- {evt.post_author}"
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
html = unicode_to_html(text, html, "\u0336", "del")
html = unicode_to_html(text, html, "\u0332", "u")
if html:
html = html.replace("\n", "<br/>")
+31
View File
@@ -1,3 +1,4 @@
from html import escape
import struct
import re
@@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^<blockquote data-mx-reply>[\s\S]+?</bl
def trim_reply_fallback_html(html):
return HTML_REPLY_FALLBACK_REGEX.sub("", html)
def unicode_to_html(text, html, ctrl, tag):
if "\u0336" not in text and "\u0332" not in text:
return html
if not html:
html = escape(text)
tag_start = f"<{tag}>"
tag_end = f"</{tag}>"
characters = html.split(ctrl)
html = ""
in_del = False
for char in characters:
if not in_del:
if len(char) > 1:
html += char[0:-1]
char = char[-1]
html += tag_start
in_del = True
html += char
else:
if len(char) > 1:
html += tag_end
in_del = False
html += char
return html
def html_to_unicode(text, ctrl):
return ctrl.join(text) + ctrl