diff --git a/mautrix_telegram/formatter/from_matrix/parser.py b/mautrix_telegram/formatter/from_matrix/parser.py index bf9b44d1..162d12b8 100644 --- a/mautrix_telegram/formatter/from_matrix/parser.py +++ b/mautrix_telegram/formatter/from_matrix/parser.py @@ -18,15 +18,15 @@ from typing import List, Tuple, Pattern import re from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command, - MessageEntityMentionName as MentionName, MessageEntityEmail as Email, - MessageEntityUrl as URL, MessageEntityTextUrl as TextURL, + MessageEntityMentionName as MentionName, MessageEntityUrl as URL, + MessageEntityEmail as Email, MessageEntityTextUrl as TextURL, MessageEntityBold as Bold, MessageEntityItalic as Italic, MessageEntityCode as Code, MessageEntityPre as Pre, - TypeMessageEntity) + MessageEntityStrike as Strike, MessageEntityUnderline as Underline, + MessageEntityBlockquote as Blockquote, TypeMessageEntity) from ... import user as u, puppet as pu, portal as po from ...types import MatrixUserID -from ..util import html_to_unicode from .telegram_message import TelegramMessage, Entity, offset_length_multiply from .html_reader import HTMLNode, read_html @@ -101,13 +101,6 @@ class MatrixParser: children.append(child) return TelegramMessage.join(children, "\n") - @classmethod - def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: - msg = cls.tag_aware_parse_node(node, ctx) - children = msg.trim().split("\n") - children = [child.prepend("> ") for child in children] - return TelegramMessage.join(children, "\n") - @classmethod def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: children = cls.node_to_tmessages(node, ctx) @@ -122,15 +115,14 @@ class MatrixParser: msg.format(Bold) elif node.tag in ("i", "em"): msg.format(Italic) + elif node.tag in ("s", "strike", "del"): + msg.format(Strike) + elif node.tag in ("u", "ins"): + msg.format(Underline) + elif node == "blockquote": + msg.format(Blockquote) elif node.tag == "command": msg.format(Command) - elif node.tag in ("s", "strike", "del"): - msg.text = html_to_unicode(msg.text, "\u0336") - elif node.tag in ("u", "ins"): - msg.text = html_to_unicode(msg.text, "\u0332") - - if node.tag in ("s", "strike", "del", "u", "ins"): - msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2)) return msg @@ -171,9 +163,7 @@ class MatrixParser: @classmethod def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage: - if node.tag == "blockquote": - return cls.blockquote_to_tmessage(node, ctx) - elif node.tag == "ol": + if node.tag == "ol": return cls.list_to_tmessage(node, ctx) elif node.tag == "ul": return cls.list_to_tmessage(node, ctx.enter_list()) @@ -181,7 +171,8 @@ class MatrixParser: return cls.header_to_tmessage(node, ctx) elif node.tag == "br": return TelegramMessage("\n") - elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"): + elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "blockquote", + "command"): return cls.basic_format_to_tmessage(node, ctx) elif node.tag == "a": return cls.link_to_tstring(node, ctx) diff --git a/mautrix_telegram/formatter/from_telegram.py b/mautrix_telegram/formatter/from_telegram.py index ccbe82e7..62e9ff8a 100644 --- a/mautrix_telegram/formatter/from_telegram.py +++ b/mautrix_telegram/formatter/from_telegram.py @@ -24,7 +24,8 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M MessageEntityItalic, MessageEntityCode, MessageEntityPre, MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag, MessageEntityPhone, TypeMessageEntity, Message, PeerChannel, - MessageFwdHeader, PeerUser) + MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader, + MessageEntityUnderline, PeerUser) from mautrix_appservice import MatrixRequestError from mautrix_appservice.intent_api import IntentAPI @@ -33,7 +34,7 @@ from .. import user as u, puppet as pu, portal as po from ..types import TelegramID from ..db import Message as DBMessage from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, - trim_reply_fallback_text, unicode_to_html) + trim_reply_fallback_text) if TYPE_CHECKING: from ..abstract_user import AbstractUser @@ -194,9 +195,6 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser", text += f"\n- {evt.post_author}" html += f"
- {evt.post_author}" - html = unicode_to_html(text, html, "\u0336", "del") - html = unicode_to_html(text, html, "\u0332", "u") - if html: html = html.replace("\n", "
") @@ -214,29 +212,43 @@ def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEnti return "[failed conversion in _telegram_entities_to_matrix]" -def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str: +def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity], + offset: int = 0, length: int = None) -> str: if not entities: - return text + return escape(text) + if length is None: + length = len(text) html = [] last_offset = 0 - for entity in entities: - if entity.offset > last_offset: - html.append(escape(text[last_offset:entity.offset])) - elif entity.offset < last_offset: + for i, entity in enumerate(entities): + if entity.offset > offset + length: + break + relative_offset = entity.offset - offset + if relative_offset > last_offset: + html.append(escape(text[last_offset:relative_offset])) + elif relative_offset < last_offset: continue skip_entity = False - entity_text = escape(text[entity.offset:entity.offset + entity.length]) + entity_text = _telegram_entities_to_matrix( + text=text[relative_offset:relative_offset + entity.length], + entities=entities[i + 1:], offset=entity.offset, length=entity.length) entity_type = type(entity) if entity_type == MessageEntityBold: html.append(f"{entity_text}") elif entity_type == MessageEntityItalic: html.append(f"{entity_text}") + elif entity_type == MessageEntityUnderline: + html.append(f"{entity_text}") + elif entity_type == MessageEntityStrike: + html.append(f"{entity_text}") + elif entity_type == MessageEntityBlockquote: + html.append(f"
{entity_text}
") elif entity_type == MessageEntityCode: - html.append(("
{entity_text}
" - if "\n" in entity_text - else "{entity_text}").format(entity_text=entity_text)) + html.append(f"
{entity_text}
" + if "\n" in entity_text + else f"{entity_text}") elif entity_type == MessageEntityPre: skip_entity = _parse_pre(html, entity_text, entity.language) elif entity_type == MessageEntityMention: @@ -254,8 +266,8 @@ def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) - html.append(f"{entity_text}") else: skip_entity = True - last_offset = entity.offset + (0 if skip_entity else entity.length) - html.append(text[last_offset:]) + last_offset = relative_offset + (0 if skip_entity else entity.length) + html.append(escape(text[last_offset:])) return "".join(html) diff --git a/mautrix_telegram/formatter/util.py b/mautrix_telegram/formatter/util.py index b0456f51..4ac01284 100644 --- a/mautrix_telegram/formatter/util.py +++ b/mautrix_telegram/formatter/util.py @@ -20,38 +20,6 @@ import struct import re -def unicode_to_html(text: str, html: str, ctrl: str, tag: str) -> str: - if ctrl not in text: - return html - if not html: - html = escape(text) - tag_start = f"<{tag}>" - tag_end = f"" - characters = html.split(ctrl) - html = "" - in_tag = False - for char in characters: - if not in_tag: - if len(char) > 1: - html += char[0:-1] - char = char[-1] - html += tag_start - in_tag = True - html += char - else: - if len(char) > 1: - html += tag_end - in_tag = False - html += char - if in_tag: - html += tag_end - return html - - -def html_to_unicode(text: str, ctrl: str) -> str: - return ctrl.join(text) + ctrl - - # add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon. # Licensed under the MIT license. # https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py diff --git a/setup.py b/setup.py index df860716..0f757c09 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ extras = { "fast_crypto": ["cryptg>=0.1,<0.3"], "webp_convert": ["Pillow>=4.3.0,<7"], "hq_thumbnails": ["moviepy>=1.0,<2.0"], - "metrics": ["prometheus-client>=0.6.0,<0.7.0"], + "metrics": ["prometheus-client>=0.6.0,<0.8.0"], } extras["all"] = list({dep for deps in extras.values() for dep in deps}) @@ -38,7 +38,7 @@ setuptools.setup( "ruamel.yaml>=0.15.35,<0.16", "future-fstrings>=0.4.2", "python-magic>=0.4.15,<0.5", - "telethon>=1.7,<1.9", + "telethon>=1.9,<1.10", "telethon-session-sqlalchemy>=0.2.14,<0.3", ], extras_require=extras,