Fix Matrix->Telegram name mentions

This commit is contained in:
Tulir Asokan
2020-11-07 16:01:21 +02:00
parent 04e2497dd3
commit be59d50678
3 changed files with 77 additions and 72 deletions
+1 -2
View File
@@ -1,5 +1,4 @@
from .from_matrix import (matrix_reply_to_telegram, matrix_to_telegram, matrix_text_to_telegram,
init_mx)
from .from_matrix import matrix_reply_to_telegram, matrix_to_telegram, init_mx
from .from_telegram import telegram_reply_to_matrix, telegram_to_matrix
from .. import context as c
@@ -18,10 +18,12 @@ import re
import logging
from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic,
TypeMessageEntity)
TypeMessageEntity, InputMessageEntityMentionName)
from telethon.helpers import add_surrogate, del_surrogate
from telethon import TelegramClient
from mautrix.types import RoomID, MessageEventContent
from mautrix.util.logging import TraceLogger
from ... import puppet as pu
from ...types import TelegramID
@@ -31,30 +33,19 @@ from .parser import ParsedMessage, parse_html
if TYPE_CHECKING:
from ...context import Context
log: logging.Logger = logging.getLogger("mau.fmt.mx")
log: TraceLogger = logging.getLogger("mau.fmt.mx")
should_bridge_plaintext_highlights: bool = False
command_regex: Pattern = re.compile(r"^!([A-Za-z0-9@]+)")
not_command_regex: Pattern = re.compile(r"^\\(![A-Za-z0-9@]+)")
plain_mention_regex: Optional[Pattern] = None
def plain_mention_to_html(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
return (f"{match.group(1)}"
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
f"{puppet.displayname}"
"</a>")
return "".join(match.groups())
MAX_LENGTH = 4096
CUTOFF_TEXT = " [message cut]"
CUT_MAX_LENGTH = MAX_LENGTH - len(CUTOFF_TEXT)
def cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage:
def _cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage:
if len(message) > MAX_LENGTH:
message = message[0:CUT_MAX_LENGTH] + CUTOFF_TEXT
new_entities = []
@@ -73,23 +64,6 @@ class FormatError(Exception):
pass
def matrix_to_telegram(html: str) -> ParsedMessage:
try:
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(plain_mention_to_html, html)
text, entities = parse_html(add_surrogate(html))
text = del_surrogate(text.strip())
text, entities = cut_long_message(text, entities)
return text, entities
except Exception as e:
raise FormatError(f"Failed to convert Matrix format: {html}") from e
def matrix_reply_to_telegram(content: MessageEventContent, tg_space: TelegramID,
room_id: Optional[RoomID] = None) -> Optional[TelegramID]:
event_id = content.get_reply_to()
@@ -103,19 +77,61 @@ def matrix_reply_to_telegram(content: MessageEventContent, tg_space: TelegramID,
return None
def matrix_text_to_telegram(text: str) -> ParsedMessage:
async def matrix_to_telegram(client: TelegramClient, *, text: Optional[str] = None,
html: Optional[str] = None) -> ParsedMessage:
if html is not None:
text, entities = _matrix_html_to_telegram(html)
elif text is not None:
text, entities = _matrix_text_to_telegram(text)
else:
raise ValueError("text or html must be provided to convert formatting")
await _fix_name_mentions(client, entities)
return text, entities
def _matrix_html_to_telegram(html: str) -> ParsedMessage:
try:
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(_plain_mention_to_html, html)
text, entities = parse_html(add_surrogate(html))
text = del_surrogate(text.strip())
text, entities = _cut_long_message(text, entities)
return text, entities
except Exception as e:
raise FormatError(f"Failed to convert Matrix format: {html}") from e
def _matrix_text_to_telegram(text: str) -> ParsedMessage:
text = command_regex.sub(r"/\1", text)
text = text.replace("\t", " " * 4)
text = not_command_regex.sub(r"\1", text)
if should_bridge_plaintext_highlights:
entities, pmr_replacer = plain_mention_to_text()
entities, pmr_replacer = _plain_mention_to_text()
text = plain_mention_regex.sub(pmr_replacer, text)
else:
entities = []
return text, entities
def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], str]]:
async def _fix_name_mentions(client: TelegramClient, entities: List[TypeMessageEntity]) -> None:
for index in reversed(range(len(entities))):
entity = entities[index]
if isinstance(entity, (MessageEntityMentionName, InputMessageEntityMentionName)):
try:
user = await client.get_input_entity(entity.user_id)
except (ValueError, TypeError) as e:
log.trace(f"Dropping mention of {entity.user_id}: {e}")
del entities[index]
else:
entities[index] = InputMessageEntityMentionName(entity.offset, entity.length, user)
def _plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match], str]]:
entities = []
def replacer(match: Match) -> str:
@@ -136,6 +152,16 @@ def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[Match],
return entities, replacer
def _plain_mention_to_html(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
return (f"{match.group(1)}"
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
f"{puppet.displayname}"
"</a>")
return "".join(match.groups())
def init_mx(context: "Context") -> None:
global plain_mention_regex, should_bridge_plaintext_highlights
config = context.config
+16 -36
View File
@@ -13,7 +13,7 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Awaitable, Dict, List, Optional, Tuple, Union, Any, TYPE_CHECKING
from typing import Awaitable, Dict, Optional, Union, Any, TYPE_CHECKING
from html import escape as escape_html
from string import Template
from abc import ABC
@@ -28,11 +28,11 @@ from telethon.errors import (ChatNotModifiedError, PhotoExtInvalidError,
PhotoInvalidDimensionsError, PhotoSaveFileInvalidError,
RPCError)
from telethon.tl.patched import Message, MessageService
from telethon.tl.types import (
DocumentAttributeFilename, DocumentAttributeImageSize, GeoPoint,
InputChatUploadedPhoto, MessageActionChatEditPhoto, MessageMediaGeo,
SendMessageCancelAction, SendMessageTypingAction, TypeInputPeer, TypeMessageEntity,
UpdateNewMessage, InputMediaUploadedDocument, InputMediaUploadedPhoto)
from telethon.tl.types import (DocumentAttributeFilename, DocumentAttributeImageSize, GeoPoint,
InputChatUploadedPhoto, MessageActionChatEditPhoto, MessageMediaGeo,
SendMessageCancelAction, SendMessageTypingAction, TypeInputPeer,
UpdateNewMessage, InputMediaUploadedDocument,
InputMediaUploadedPhoto)
from mautrix.types import (EventID, RoomID, UserID, ContentURI, MessageType, MessageEventContent,
TextMessageEventContent, MediaMessageEventContent, Format,
@@ -87,7 +87,7 @@ class PortalMatrix(BasePortal, ABC):
message = await self._get_state_change_message(event, user, **kwargs)
if not message:
return
message, entities = formatter.matrix_to_telegram(message)
message, entities = await formatter.matrix_to_telegram(self.bot.client, html=message)
response = await self.bot.client.send_message(self.peer, message,
formatting_entities=entities)
space = self.tgid if self.peer_type == "channel" else self.bot.tgid
@@ -214,27 +214,11 @@ class PortalMatrix(BasePortal, ABC):
elif content.msgtype == MessageType.EMOTE:
await self._apply_emote_format(sender, content)
@staticmethod
def _matrix_event_to_entities(event: Union[str, MessageEventContent]
) -> Tuple[str, Optional[List[TypeMessageEntity]]]:
try:
if isinstance(event, str):
message, entities = formatter.matrix_to_telegram(event)
elif isinstance(event, TextMessageEventContent) and event.format == Format.HTML:
message, entities = formatter.matrix_to_telegram(event.formatted_body)
else:
message, entities = formatter.matrix_text_to_telegram(event.body)
except KeyError:
message, entities = None, None
return message, entities
async def _handle_matrix_text(self, sender_id: TelegramID, event_id: EventID,
space: TelegramID, client: 'MautrixTelegramClient',
content: TextMessageEventContent, reply_to: TelegramID) -> None:
if content.formatted_body and content.format == Format.HTML:
message, entities = formatter.matrix_to_telegram(content.formatted_body)
else:
message, entities = formatter.matrix_text_to_telegram(content.body)
message, entities = await formatter.matrix_to_telegram(client, text=content.body,
html=content.formatted(Format.HTML))
async with self.send_lock(sender_id):
lp = self.get_config("telegram_link_preview")
if content.get_edit():
@@ -301,25 +285,21 @@ class PortalMatrix(BasePortal, ABC):
media = InputMediaUploadedDocument(file=file_handle, attributes=attributes,
mime_type=mime or "application/octet-stream")
if caption:
if caption.formatted_body and caption.format == Format.HTML:
caption, entities = formatter.matrix_to_telegram(caption.formatted_body)
else:
caption, entities = formatter.matrix_text_to_telegram(caption.body)
else:
caption, entities = None, None
capt, entities = (await formatter.matrix_to_telegram(client, text=caption.body,
html=caption.formatted(Format.HTML))
if caption else (None, None))
async with self.send_lock(sender_id):
if await self._matrix_document_edit(client, content, space, caption, media, event_id):
if await self._matrix_document_edit(client, content, space, capt, media, event_id):
return
try:
response = await client.send_media(self.peer, media, reply_to=reply_to,
caption=caption, entities=entities)
caption=capt, entities=entities)
except (PhotoInvalidDimensionsError, PhotoSaveFileInvalidError, PhotoExtInvalidError):
media = InputMediaUploadedDocument(file=media.file, mime_type=mime,
attributes=attributes)
response = await client.send_media(self.peer, media, reply_to=reply_to,
caption=caption, entities=entities)
caption=capt, entities=entities)
self._add_telegram_message_to_db(event_id, space, 0, response)
await self._send_delivery_receipt(event_id)
@@ -346,7 +326,7 @@ class PortalMatrix(BasePortal, ABC):
except (KeyError, ValueError):
self.log.exception("Failed to parse location")
return None
caption, entities = formatter.matrix_text_to_telegram(content.body)
caption, entities = await formatter.matrix_to_telegram(client, text=content.body)
media = MessageMediaGeo(geo=GeoPoint(lat, long, access_hash=0))
async with self.send_lock(sender_id):