# -*- coding: future_fstrings -*- # mautrix-telegram - A Matrix-Telegram puppeting bridge # Copyright (C) 2018 Tulir Asokan # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from typing import Optional, List, Tuple, TYPE_CHECKING from html import escape import logging import re from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityEmail, MessageEntityUrl, MessageEntityTextUrl, MessageEntityBold, MessageEntityItalic, MessageEntityCode, MessageEntityPre, MessageEntityBotCommand, Message, PeerChannel, MessageEntityHashtag, TypeMessageEntity, MessageFwdHeader, PeerUser) from mautrix_appservice import MatrixRequestError from mautrix_appservice.intent_api import IntentAPI from .. import user as u, puppet as pu, portal as po from ..db import Message as DBMessage from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html, trim_reply_fallback_text, unicode_to_html) if TYPE_CHECKING: from ..abstract_user import AbstractUser from ..context import Context try: from lxml.html.diff import htmldiff except ImportError: htmldiff = None # type: function log = logging.getLogger("mau.fmt.tg") # type: logging.Logger should_highlight_edits = False # type: bool def telegram_reply_to_matrix(evt: Message, source: "AbstractUser") -> dict: if evt.reply_to_msg_id: space = (evt.to_id.channel_id if isinstance(evt, Message) and isinstance(evt.to_id, PeerChannel) else source.tgid) msg = DBMessage.query.get((evt.reply_to_msg_id, space)) if msg: return { "m.in_reply_to": { "event_id": msg.mxid, "room_id": msg.mx_room, } } return {} async def _add_forward_header(source, text: str, html: Optional[str], fwd_from: MessageFwdHeader) -> Tuple[str, str]: if not html: html = escape(text) fwd_from_html, fwd_from_text = None, None if fwd_from.from_id: user = u.User.get_by_tgid(fwd_from.from_id) if user: fwd_from_text = user.displayname or user.mxid fwd_from_html = f"{fwd_from_text}" if not fwd_from_text: puppet = pu.Puppet.get(fwd_from.from_id, create=False) if puppet and puppet.displayname: fwd_from_text = puppet.displayname or puppet.mxid fwd_from_html = f"{fwd_from_text}" if not fwd_from_text: user = await source.client.get_entity(PeerUser(fwd_from.from_id)) if user: fwd_from_text = pu.Puppet.get_displayname(user, False) fwd_from_html = f"{fwd_from_text}" if not fwd_from_text: if fwd_from.from_id: fwd_from_text = "Unknown user" else: fwd_from_text = "Unknown source" fwd_from_html = f"{fwd_from_text}" text = "\n".join([f"> {line}" for line in text.split("\n")]) text = f"Forwarded from {fwd_from_text}:\n{text}" html = (f"Forwarded message from {fwd_from_html}
" f"
{html}
") return text, html def highlight_edits(new_html: str, old_html: str) -> str: # Don't include `Edit:` text in diff. if old_html.startswith("Edit: "): old_html = old_html[len("Edit: "):] # Generate diff with lxml new_html = htmldiff(old_html, new_html) # Replace with since Riot doesn't allow new_html = new_html.replace("", "").replace("", "") # Remove s since we just want to hide deletions. new_html = re.sub(".+?", "", new_html) return new_html async def _add_reply_header(source: "AbstractUser", text: str, html: str, evt: Message, relates_to: dict, main_intent: IntentAPI, is_edit: bool ) -> Tuple[str, str]: space = (evt.to_id.channel_id if isinstance(evt, Message) and isinstance(evt.to_id, PeerChannel) else source.tgid) msg = DBMessage.query.get((evt.reply_to_msg_id, space)) if not msg: return text, html relates_to["m.in_reply_to"] = { "event_id": msg.mxid, "room_id": msg.mx_room, } try: event = await main_intent.get_event(msg.mx_room, msg.mxid) content = event["content"] r_sender = event["sender"] r_text_body = trim_reply_fallback_text(content["body"]) r_html_body = trim_reply_fallback_html(content["formatted_body"] if "formatted_body" in content else escape(content["body"])) puppet = pu.Puppet.get_by_mxid(r_sender, create=False) r_displayname = puppet.displayname if puppet else r_sender r_sender_link = f"{r_displayname}" if is_edit and should_highlight_edits: html = highlight_edits(html or escape(text), r_html_body) except (ValueError, KeyError, MatrixRequestError): r_sender_link = "unknown user" r_displayname = "unknown user" r_text_body = "Failed to fetch message" r_html_body = "Failed to fetch message" if is_edit: html = f"Edit: {html or escape(text)}" text = f"Edit: {text}" r_keyword = "In reply to" if not is_edit else "Edit to" r_msg_link = f"{r_keyword}" html = ( f"
{r_msg_link} {r_sender_link}\n{r_html_body}
" + (html or escape(text))) lines = r_text_body.strip().split("\n") text_with_quote = f"> <{r_displayname}> {lines.pop(0)}" for line in lines: if line: text_with_quote += f"\n> {line}" text_with_quote += "\n\n" text_with_quote += text return text_with_quote, html async def telegram_to_matrix(evt: Message, source: "AbstractUser", main_intent: Optional[IntentAPI] = None, is_edit: bool = False, prefix_text: Optional[str] = None, prefix_html: Optional[str] = None) -> Tuple[str, str, dict]: text = add_surrogates(evt.message) html = _telegram_entities_to_matrix_catch(text, evt.entities) if evt.entities else None relates_to = {} if prefix_html: html = prefix_html + (html or escape(text)) if prefix_text: text = prefix_text + text if evt.fwd_from: text, html = await _add_forward_header(source, text, html, evt.fwd_from) if evt.reply_to_msg_id: text, html = await _add_reply_header(source, text, html, evt, relates_to, main_intent, is_edit) if isinstance(evt, Message) and evt.post and evt.post_author: if not html: html = escape(text) text += f"\n- {evt.post_author}" html += f"
- {evt.post_author}" html = unicode_to_html(text, html, "\u0336", "del") html = unicode_to_html(text, html, "\u0332", "u") if html: html = html.replace("\n", "
") return remove_surrogates(text), remove_surrogates(html), relates_to def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEntity]) -> str: try: return _telegram_entities_to_matrix(text, entities) except Exception: log.exception("Failed to convert Telegram format:\n" "message=%s\n" "entities=%s", text, entities) def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str: if not entities: return text html = [] last_offset = 0 for entity in entities: if entity.offset > last_offset: html.append(escape(text[last_offset:entity.offset])) elif entity.offset < last_offset: continue skip_entity = False entity_text = escape(text[entity.offset:entity.offset + entity.length]) entity_type = type(entity) if entity_type == MessageEntityBold: html.append(f"{entity_text}") elif entity_type == MessageEntityItalic: html.append(f"{entity_text}") elif entity_type == MessageEntityCode: html.append(f"{entity_text}") elif entity_type == MessageEntityPre: skip_entity = _parse_pre(html, entity_text, entity.language) elif entity_type == MessageEntityMention: skip_entity = _parse_mention(html, entity_text) elif entity_type == MessageEntityMentionName: skip_entity = _parse_name_mention(html, entity_text, entity.user_id) elif entity_type == MessageEntityEmail: html.append(f"{entity_text}") elif entity_type in {MessageEntityTextUrl, MessageEntityUrl}: skip_entity = _parse_url(html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None) elif entity_type == MessageEntityBotCommand: html.append(f"!{entity_text[1:]}") elif entity_type == MessageEntityHashtag: html.append(f"{entity_text}") else: skip_entity = True last_offset = entity.offset + (0 if skip_entity else entity.length) html.append(text[last_offset:]) return "".join(html) def _parse_pre(html: List[str], entity_text: str, language: str) -> bool: if language: html.append("
"
                    f"{entity_text}"
                    "
") else: html.append(f"
{entity_text}
") return False def _parse_mention(html: List[str], entity_text: str) -> bool: username = entity_text[1:] user = u.User.find_by_username(username) or pu.Puppet.find_by_username(username) if user: mxid = user.mxid else: portal = po.Portal.find_by_username(username) mxid = portal.alias or portal.mxid if portal else None if mxid: html.append(f"{entity_text}") else: return True return False def _parse_name_mention(html: List[str], entity_text: str, user_id: int) -> bool: user = u.User.get_by_tgid(user_id) if user: mxid = user.mxid else: puppet = pu.Puppet.get(user_id, create=False) mxid = puppet.mxid if puppet else None if mxid: html.append(f"{entity_text}") else: return True return False message_link_regex = re.compile( r"https?://t(?:elegram)?\.(?:me|dog)/([A-Za-z][A-Za-z0-9_]{3,}[A-Za-z0-9])/([0-9]{1,50})") def _parse_url(html: List[str], entity_text: str, url: str) -> bool: url = escape(url) if url else entity_text if not url.startswith(("https://", "http://", "ftp://", "magnet://")): url = "http://" + url message_link_match = message_link_regex.match(url) if message_link_match: group, msgid = message_link_match.groups() msgid = int(msgid) portal = po.Portal.find_by_username(group) if portal: message = DBMessage.query.get((msgid, portal.tgid)) if message: url = f"https://matrix.to/#/{portal.mxid}/{message.mxid}" html.append(f"{entity_text}") return False def init_tg(context: "Context") -> None: global should_highlight_edits should_highlight_edits = htmldiff and context.config["bridge.highlight_edits"]