diff --git a/mautrix_telegram/formatter/__init__.py b/mautrix_telegram/formatter/__init__.py index ed3066eb..2978ed01 100644 --- a/mautrix_telegram/formatter/__init__.py +++ b/mautrix_telegram/formatter/__init__.py @@ -1,5 +1,4 @@ -from .from_matrix import (matrix_reply_to_telegram, matrix_to_telegram, matrix_text_to_telegram, - init_mx) +from .from_matrix import matrix_reply_to_telegram, matrix_to_telegram, init_mx from .from_telegram import telegram_reply_to_matrix, telegram_to_matrix from .. import context as c diff --git a/mautrix_telegram/formatter/from_matrix/__init__.py b/mautrix_telegram/formatter/from_matrix/__init__.py index 4cac62b4..1fb6af1b 100644 --- a/mautrix_telegram/formatter/from_matrix/__init__.py +++ b/mautrix_telegram/formatter/from_matrix/__init__.py @@ -18,10 +18,12 @@ import re import logging from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic, - TypeMessageEntity) + TypeMessageEntity, InputMessageEntityMentionName) from telethon.helpers import add_surrogate, del_surrogate +from telethon import TelegramClient from mautrix.types import RoomID, MessageEventContent +from mautrix.util.logging import TraceLogger from ... import puppet as pu from ...types import TelegramID @@ -31,30 +33,19 @@ from .parser import ParsedMessage, parse_html if TYPE_CHECKING: from ...context import Context -log: logging.Logger = logging.getLogger("mau.fmt.mx") +log: TraceLogger = logging.getLogger("mau.fmt.mx") should_bridge_plaintext_highlights: bool = False command_regex: Pattern = re.compile(r"^!([A-Za-z0-9@]+)") not_command_regex: Pattern = re.compile(r"^\\(![A-Za-z0-9@]+)") plain_mention_regex: Optional[Pattern] = None - -def plain_mention_to_html(match: Match) -> str: - puppet = pu.Puppet.find_by_displayname(match.group(2)) - if puppet: - return (f"{match.group(1)}" - f"" - f"{puppet.displayname}" - "") - return "".join(match.groups()) - - MAX_LENGTH = 4096 CUTOFF_TEXT = " [message cut]" CUT_MAX_LENGTH = MAX_LENGTH - len(CUTOFF_TEXT) -def cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage: +def _cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage: if len(message) > MAX_LENGTH: message = message[0:CUT_MAX_LENGTH] + CUTOFF_TEXT new_entities = [] @@ -73,23 +64,6 @@ class FormatError(Exception): pass -def matrix_to_telegram(html: str) -> ParsedMessage: - try: - html = command_regex.sub(r"\1", html) - html = html.replace("\t", " " * 4) - html = not_command_regex.sub(r"\1", html) - if should_bridge_plaintext_highlights: - html = plain_mention_regex.sub(plain_mention_to_html, html) - - text, entities = parse_html(add_surrogate(html)) - text = del_surrogate(text.strip()) - text, entities = cut_long_message(text, entities) - - return text, entities - except Exception as e: - raise FormatError(f"Failed to convert Matrix format: {html}") from e - - def matrix_reply_to_telegram(content: MessageEventContent, tg_space: TelegramID, room_id: Optional[RoomID] = None) -> Optional[TelegramID]: event_id = content.get_reply_to() @@ -103,19 +77,61 @@ def matrix_reply_to_telegram(content: MessageEventContent, tg_space: TelegramID, return None -def matrix_text_to_telegram(text: str) -> ParsedMessage: +async def matrix_to_telegram(client: TelegramClient, *, text: Optional[str] = None, + html: Optional[str] = None) -> ParsedMessage: + if html is not None: + text, entities = _matrix_html_to_telegram(html) + elif text is not None: + text, entities = _matrix_text_to_telegram(text) + else: + raise ValueError("text or html must be provided to convert formatting") + await _fix_name_mentions(client, entities) + return text, entities + + +def _matrix_html_to_telegram(html: str) -> ParsedMessage: + try: + html = command_regex.sub(r"\1", html) + html = html.replace("\t", " " * 4) + html = not_command_regex.sub(r"\1", html) + if should_bridge_plaintext_highlights: + html = plain_mention_regex.sub(_plain_mention_to_html, html) + + text, entities = parse_html(add_surrogate(html)) + text = del_surrogate(text.strip()) + text, entities = _cut_long_message(text, entities) + + return text, entities + except Exception as e: + raise FormatError(f"Failed to convert Matrix format: {html}") from e + + +def _matrix_text_to_telegram(text: str) -> ParsedMessage: text = command_regex.sub(r"/\1", text) text = text.replace("\t", " " * 4) text = not_command_regex.sub(r"\1", text) if should_bridge_plaintext_highlights: - entities, pmr_replacer = plain_mention_to_text() + entities, pmr_replacer = _plain_mention_to_text() text = plain_mention_regex.sub(pmr_replacer, text) else: entities = [] return text, entities -def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], str]]: +async def _fix_name_mentions(client: TelegramClient, entities: List[TypeMessageEntity]) -> None: + for index in reversed(range(len(entities))): + entity = entities[index] + if isinstance(entity, (MessageEntityMentionName, InputMessageEntityMentionName)): + try: + user = await client.get_input_entity(entity.user_id) + except (ValueError, TypeError) as e: + log.trace(f"Dropping mention of {entity.user_id}: {e}") + del entities[index] + else: + entities[index] = InputMessageEntityMentionName(entity.offset, entity.length, user) + + +def _plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], str]]: entities = [] def replacer(match: Match) -> str: @@ -136,6 +152,16 @@ def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], return entities, replacer +def _plain_mention_to_html(match: Match) -> str: + puppet = pu.Puppet.find_by_displayname(match.group(2)) + if puppet: + return (f"{match.group(1)}" + f"" + f"{puppet.displayname}" + "") + return "".join(match.groups()) + + def init_mx(context: "Context") -> None: global plain_mention_regex, should_bridge_plaintext_highlights config = context.config diff --git a/mautrix_telegram/portal/matrix.py b/mautrix_telegram/portal/matrix.py index e6adac82..509b40a4 100644 --- a/mautrix_telegram/portal/matrix.py +++ b/mautrix_telegram/portal/matrix.py @@ -13,7 +13,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import Awaitable, Dict, List, Optional, Tuple, Union, Any, TYPE_CHECKING +from typing import Awaitable, Dict, Optional, Union, Any, TYPE_CHECKING from html import escape as escape_html from string import Template from abc import ABC @@ -28,11 +28,11 @@ from telethon.errors import (ChatNotModifiedError, PhotoExtInvalidError, PhotoInvalidDimensionsError, PhotoSaveFileInvalidError, RPCError) from telethon.tl.patched import Message, MessageService -from telethon.tl.types import ( - DocumentAttributeFilename, DocumentAttributeImageSize, GeoPoint, - InputChatUploadedPhoto, MessageActionChatEditPhoto, MessageMediaGeo, - SendMessageCancelAction, SendMessageTypingAction, TypeInputPeer, TypeMessageEntity, - UpdateNewMessage, InputMediaUploadedDocument, InputMediaUploadedPhoto) +from telethon.tl.types import (DocumentAttributeFilename, DocumentAttributeImageSize, GeoPoint, + InputChatUploadedPhoto, MessageActionChatEditPhoto, MessageMediaGeo, + SendMessageCancelAction, SendMessageTypingAction, TypeInputPeer, + UpdateNewMessage, InputMediaUploadedDocument, + InputMediaUploadedPhoto) from mautrix.types import (EventID, RoomID, UserID, ContentURI, MessageType, MessageEventContent, TextMessageEventContent, MediaMessageEventContent, Format, @@ -87,7 +87,7 @@ class PortalMatrix(BasePortal, ABC): message = await self._get_state_change_message(event, user, **kwargs) if not message: return - message, entities = formatter.matrix_to_telegram(message) + message, entities = await formatter.matrix_to_telegram(self.bot.client, html=message) response = await self.bot.client.send_message(self.peer, message, formatting_entities=entities) space = self.tgid if self.peer_type == "channel" else self.bot.tgid @@ -214,27 +214,11 @@ class PortalMatrix(BasePortal, ABC): elif content.msgtype == MessageType.EMOTE: await self._apply_emote_format(sender, content) - @staticmethod - def _matrix_event_to_entities(event: Union[str, MessageEventContent] - ) -> Tuple[str, Optional[List[TypeMessageEntity]]]: - try: - if isinstance(event, str): - message, entities = formatter.matrix_to_telegram(event) - elif isinstance(event, TextMessageEventContent) and event.format == Format.HTML: - message, entities = formatter.matrix_to_telegram(event.formatted_body) - else: - message, entities = formatter.matrix_text_to_telegram(event.body) - except KeyError: - message, entities = None, None - return message, entities - async def _handle_matrix_text(self, sender_id: TelegramID, event_id: EventID, space: TelegramID, client: 'MautrixTelegramClient', content: TextMessageEventContent, reply_to: TelegramID) -> None: - if content.formatted_body and content.format == Format.HTML: - message, entities = formatter.matrix_to_telegram(content.formatted_body) - else: - message, entities = formatter.matrix_text_to_telegram(content.body) + message, entities = await formatter.matrix_to_telegram(client, text=content.body, + html=content.formatted(Format.HTML)) async with self.send_lock(sender_id): lp = self.get_config("telegram_link_preview") if content.get_edit(): @@ -301,25 +285,21 @@ class PortalMatrix(BasePortal, ABC): media = InputMediaUploadedDocument(file=file_handle, attributes=attributes, mime_type=mime or "application/octet-stream") - if caption: - if caption.formatted_body and caption.format == Format.HTML: - caption, entities = formatter.matrix_to_telegram(caption.formatted_body) - else: - caption, entities = formatter.matrix_text_to_telegram(caption.body) - else: - caption, entities = None, None + capt, entities = (await formatter.matrix_to_telegram(client, text=caption.body, + html=caption.formatted(Format.HTML)) + if caption else (None, None)) async with self.send_lock(sender_id): - if await self._matrix_document_edit(client, content, space, caption, media, event_id): + if await self._matrix_document_edit(client, content, space, capt, media, event_id): return try: response = await client.send_media(self.peer, media, reply_to=reply_to, - caption=caption, entities=entities) + caption=capt, entities=entities) except (PhotoInvalidDimensionsError, PhotoSaveFileInvalidError, PhotoExtInvalidError): media = InputMediaUploadedDocument(file=media.file, mime_type=mime, attributes=attributes) response = await client.send_media(self.peer, media, reply_to=reply_to, - caption=caption, entities=entities) + caption=capt, entities=entities) self._add_telegram_message_to_db(event_id, space, 0, response) await self._send_delivery_receipt(event_id) @@ -346,7 +326,7 @@ class PortalMatrix(BasePortal, ABC): except (KeyError, ValueError): self.log.exception("Failed to parse location") return None - caption, entities = formatter.matrix_text_to_telegram(content.body) + caption, entities = await formatter.matrix_to_telegram(client, text=content.body) media = MessageMediaGeo(geo=GeoPoint(lat, long, access_hash=0)) async with self.send_lock(sender_id):