From a6f26c16fc4f59bfa07f2445ac06dd24e43690cf Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Wed, 7 Mar 2018 14:03:38 +0200 Subject: [PATCH] Add strikethrough/underline <-> unicode converter to formatter --- mautrix_telegram/formatter/from_matrix.py | 13 +++++++-- mautrix_telegram/formatter/from_telegram.py | 5 +++- mautrix_telegram/formatter/util.py | 31 +++++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/mautrix_telegram/formatter/from_matrix.py b/mautrix_telegram/formatter/from_matrix.py index be9b88f8..842ee3ba 100644 --- a/mautrix_telegram/formatter/from_matrix.py +++ b/mautrix_telegram/formatter/from_matrix.py @@ -25,7 +25,8 @@ from telethon_aio.tl.types import * from .. import user as u, puppet as pu, portal as po from ..db import Message as DBMessage -from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text) +from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, + trim_reply_fallback_text, html_to_unicode) log = logging.getLogger("mau.fmt.mx") @@ -35,7 +36,7 @@ class MatrixParser(HTMLParser): room_regex = re.compile("https://matrix.to/#/(#.+:.+)") block_tags = ("br", "p", "pre", "blockquote", "ol", "ul", "li", - "h1", "h2", "h3", "h4", "h5", "h6" + "h1", "h2", "h3", "h4", "h5", "h6", "div", "hr", "table") def __init__(self): @@ -159,6 +160,14 @@ class MatrixParser(HTMLParser): text = url elif previous_tag == "command": text = f"/{text}" + + # Strikethrough + if "del" in self._open_tags: + text = html_to_unicode(text, "\u0336") + # Underline + if "u" in self._open_tags: + text = html_to_unicode(text, "\u0332") + list_entry_handled_once = False # In order to maintain order of things like blockquotes in lists or lists in blockquotes, # we can't just have ifs/elses and we need to actually loop through the open tags in order. diff --git a/mautrix_telegram/formatter/from_telegram.py b/mautrix_telegram/formatter/from_telegram.py index 276f68c3..7ed974e4 100644 --- a/mautrix_telegram/formatter/from_telegram.py +++ b/mautrix_telegram/formatter/from_telegram.py @@ -23,7 +23,7 @@ from mautrix_appservice import MatrixRequestError from .. import user as u, puppet as pu, portal as po from ..db import Message as DBMessage from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, - trim_reply_fallback_text) + trim_reply_fallback_text, unicode_to_html) log = logging.getLogger("mau.fmt.tg") @@ -138,6 +138,9 @@ async def telegram_to_matrix(evt, source, main_intent=None, is_edit=False): text += f"\n- {evt.post_author}" html += f"
- {evt.post_author}" + html = unicode_to_html(text, html, "\u0336", "del") + html = unicode_to_html(text, html, "\u0332", "u") + if html: html = html.replace("\n", "
") diff --git a/mautrix_telegram/formatter/util.py b/mautrix_telegram/formatter/util.py index ec51844c..86c263fd 100644 --- a/mautrix_telegram/formatter/util.py +++ b/mautrix_telegram/formatter/util.py @@ -1,3 +1,4 @@ +from html import escape import struct import re @@ -31,3 +32,33 @@ HTML_REPLY_FALLBACK_REGEX = re.compile(r"^
[\s\S]+?" + tag_end = f"" + characters = html.split(ctrl) + html = "" + in_del = False + for char in characters: + if not in_del: + if len(char) > 1: + html += char[0:-1] + char = char[-1] + html += tag_start + in_del = True + html += char + else: + if len(char) > 1: + html += tag_end + in_del = False + html += char + return html + + +def html_to_unicode(text, ctrl): + return ctrl.join(text) + ctrl