From 2d63c5b3cebbf7effc4ad49870f2bd0d1ffcd57d Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 10 Mar 2018 09:39:53 +0200 Subject: [PATCH] Fix and refactor Matrix->Telegram formatter --- mautrix_telegram/formatter/from_matrix.py | 115 +++++++++++--------- mautrix_telegram/formatter/from_telegram.py | 2 +- mautrix_telegram/portal.py | 2 +- 3 files changed, 68 insertions(+), 51 deletions(-) diff --git a/mautrix_telegram/formatter/from_matrix.py b/mautrix_telegram/formatter/from_matrix.py index a6a976bb..ffbc6878 100644 --- a/mautrix_telegram/formatter/from_matrix.py +++ b/mautrix_telegram/formatter/from_matrix.py @@ -151,17 +151,22 @@ class MatrixParser(HTMLParser): for entity in self._building_entities.values(): entity.length += 1 - def handle_data(self, text): - text = unescape(text) + def _handle_special_previous_tags(self, text): + if "pre" not in self._open_tags and "code" not in self._open_tags: + text = text.replace("\n", "") + else: + text = text.strip() + previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else "" - extra_offset = 0 if previous_tag == "a": url = self._open_tags_meta[0] if url: text = url elif previous_tag == "command": text = f"/{text}" + return text + def _html_to_unicode(self, text): strikethrough, underline = "del" in self._open_tags, "u" in self._open_tags if strikethrough and underline: text = html_to_unicode(text, "\u0336\u0332") @@ -169,7 +174,10 @@ class MatrixParser(HTMLParser): text = html_to_unicode(text, "\u0336") elif underline: text = html_to_unicode(text, "\u0332") + return text + def _handle_tags_for_data(self, text): + extra_offset = 0 list_entry_handled_once = False # In order to maintain order of things like blockquotes in lists or lists in blockquotes, # we can't just have ifs/elses and we need to actually loop through the open tags in order. @@ -197,10 +205,19 @@ class MatrixParser(HTMLParser): text = indent + prefix + text self._list_entry_is_new = False list_entry_handled_once = True + return text, extra_offset + + def _extend_entities_in_construction(self, text, extra_offset): for tag, entity in self._building_entities.items(): entity.length += len(text) - extra_offset entity.offset += extra_offset + def handle_data(self, text): + text = unescape(text) + text = self._handle_special_previous_tags(text) + text = self._html_to_unicode(text) + text, extra_offset = self._handle_tags_for_data(text) + self._extend_entities_in_construction(text, extra_offset) self._line_is_new = False self.text += text @@ -223,6 +240,52 @@ command_regex = re.compile("(\s|^)!([A-Za-z0-9@]+)") plain_mention_regex = None +def plain_mention_to_html(match): + puppet = pu.Puppet.find_by_displayname(match.group(2)) + if puppet: + return (f"{match.group(1)}" + f"" + f"{puppet.displayname}" + "") + return "".join(match.groups()) + + +def matrix_to_telegram(html): + try: + parser = MatrixParser() + html = command_regex.sub(r"\1\2", html) + if should_bridge_plaintext_highlights: + html = plain_mention_regex.sub(plain_mention_to_html, html) + parser.feed(add_surrogates(html)) + print([str(e) for e in parser.entities]) + return remove_surrogates(parser.text.strip()), parser.entities + except Exception: + log.exception("Failed to convert Matrix format:\nhtml=%s", html) + + +def matrix_reply_to_telegram(content, tg_space, room_id=None): + try: + reply = content["m.relates_to"]["m.in_reply_to"] + room_id = room_id or reply["room_id"] + event_id = reply["event_id"] + + try: + if content["format"] == "org.matrix.custom.html": + content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"]) + except KeyError: + pass + content["body"] = trim_reply_fallback_text(content["body"]) + + message = DBMessage.query.filter(DBMessage.mxid == event_id, + DBMessage.tg_space == tg_space, + DBMessage.mx_room == room_id).one_or_none() + if message: + return message.tgid + except KeyError: + pass + return None + + def matrix_text_to_telegram(text): text = command_regex.sub(r"\1/\2", text) if should_bridge_plaintext_highlights: @@ -255,52 +318,6 @@ def plain_mention_to_text(): return entities, replacer -def plain_mention_to_html(match): - puppet = pu.Puppet.find_by_displayname(match.group(2)) - if puppet: - return (f"{match.group(1)}" - f"" - f"{puppet.displayname}" - "") - return "".join(match.groups()) - - -def matrix_to_telegram(html): - try: - parser = MatrixParser() - html = html.replace("\n", "") - html = command_regex.sub(r"\1\2", html) - if should_bridge_plaintext_highlights: - html = plain_mention_regex.sub(plain_mention_to_html, html) - parser.feed(add_surrogates(html)) - return remove_surrogates(parser.text.strip()), parser.entities - except Exception: - log.exception("Failed to convert Matrix format:\nhtml=%s", html) - - -def matrix_reply_to_telegram(content, tg_space, room_id=None): - try: - reply = content["m.relates_to"]["m.in_reply_to"] - room_id = room_id or reply["room_id"] - event_id = reply["event_id"] - - try: - if content["format"] == "org.matrix.custom.html": - content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"]) - except KeyError: - pass - content["body"] = trim_reply_fallback_text(content["body"]) - - message = DBMessage.query.filter(DBMessage.mxid == event_id, - DBMessage.tg_space == tg_space, - DBMessage.mx_room == room_id).one_or_none() - if message: - return message.tgid - except KeyError: - pass - return None - - def init_mx(context): global plain_mention_regex, should_bridge_plaintext_highlights config = context.config diff --git a/mautrix_telegram/formatter/from_telegram.py b/mautrix_telegram/formatter/from_telegram.py index 5ed2865d..7a2bf442 100644 --- a/mautrix_telegram/formatter/from_telegram.py +++ b/mautrix_telegram/formatter/from_telegram.py @@ -132,7 +132,7 @@ async def _add_reply_header(source, text, html, evt, relates_to, main_intent, is r_keyword = "In reply to" if not is_edit else "Edit to" r_msg_link = f"{r_keyword}" - html = (f"
{r_msg_link} {r_sender_link} {r_html_body}
" + html = (f"
{r_msg_link} {r_sender_link}\n{r_html_body}
" + (html or escape(text))) lines = r_text_body.strip().split("\n") diff --git a/mautrix_telegram/portal.py b/mautrix_telegram/portal.py index 0c5ed860..0c248de0 100644 --- a/mautrix_telegram/portal.py +++ b/mautrix_telegram/portal.py @@ -594,7 +594,7 @@ class Portal: entity.user_id = await client.get_input_entity(entity.user_id.user_id) else: message, entities = formatter.matrix_text_to_telegram(message["body"]) - return await client.send_message(self.peer, message, reply_to=reply_to) + return await client.send_message(self.peer, message, entities=entities, reply_to=reply_to) async def _handle_matrix_file(self, client, message, reply_to): file = await self.main_intent.download_file(message["url"])