diff --git a/README.md b/README.md index 0a54ff4f..1e9921d3 100644 --- a/README.md +++ b/README.md @@ -54,9 +54,9 @@ does not do this automatically. ## Features & Roadmap * Matrix → Telegram * [x] Plaintext messages - * [ ] Formatted messages + * [x] Formatted messages * [ ] Bot commands (!command -> /command) - * [ ] Mentions + * [x] Mentions * [ ] Locations * [ ] Images * [ ] Files diff --git a/mautrix_telegram/__main__.py b/mautrix_telegram/__main__.py index 1111725a..8909a818 100644 --- a/mautrix_telegram/__main__.py +++ b/mautrix_telegram/__main__.py @@ -30,6 +30,7 @@ from .db import init as init_db from .user import init as init_user from .portal import init as init_portal from .puppet import init as init_puppet +from .formatter import init as init_formatter log = logging.getLogger("mau") time_formatter = logging.Formatter("[%(asctime)s] [%(levelname)s@%(name)s] %(message)s") @@ -75,6 +76,7 @@ context = (appserv, db, log, config) with appserv.run(config["appservice.hostname"], config["appservice.port"]) as start: init_db(db_factory) + init_formatter(context) init_portal(context) init_puppet(context) init_user(context) diff --git a/mautrix_telegram/formatter.py b/mautrix_telegram/formatter.py index c17f61fc..879ac111 100644 --- a/mautrix_telegram/formatter.py +++ b/mautrix_telegram/formatter.py @@ -15,11 +15,160 @@ # along with this program. If not, see . import re from html import escape, unescape +from html.parser import HTMLParser +from collections import deque from telethon.tl.types import * from . import user as u, puppet as p +log = None + + +class MatrixParser(HTMLParser): + matrix_to_regex = re.compile("https://matrix.to/#/(@.+)") + + def __init__(self): + super().__init__() + self.text = "" + self.entities = [] + self._building_entities = {} + self._list_counter = 0 + self._open_tags = deque() + self._open_tags_meta = deque() + self._previous_ended_line = True + + def handle_starttag(self, tag, attrs): + self._open_tags.appendleft(tag) + self._open_tags_meta.appendleft(0) + attrs = dict(attrs) + EntityType = None + args = {} + if tag == "strong" or tag == "b": + EntityType = MessageEntityBold + elif tag == "em" or tag == "i": + EntityType = MessageEntityItalic + elif tag == "code": + try: + pre = self._building_entities["pre"] + try: + pre.language = attrs["class"][len("language-"):] + except KeyError: + pass + except KeyError: + EntityType = MessageEntityCode + elif tag == "pre": + EntityType = MessageEntityPre + args["language"] = "" + elif tag == "a": + try: + url = attrs["href"] + except KeyError: + return + mention = self.matrix_to_regex.search(url) + if mention: + mxid = mention.group(1) + puppet_match = p.Puppet.mxid_regex.search(mxid) + if puppet_match: + user = p.Puppet.get(puppet_match.group(1), create=False) + else: + user = u.User.get_by_mxid(mxid, create=False) + if not user: + return + if user.username: + EntityType = MessageEntityMention + url = f"@{user.username}" + else: + EntityType = MessageEntityMentionName + args["user_id"] = user.tgid + elif url.startswith("mailto:"): + url = url[len("mailto:"):] + EntityType = MessageEntityEmail + else: + if self.get_starttag_text() == url: + EntityType = MessageEntityUrl + else: + EntityType = MessageEntityTextUrl + args["url"] = url + url = None + self._open_tags_meta.popleft() + self._open_tags_meta.appendleft(url) + + if EntityType and tag not in self._building_entities: + self._building_entities[tag] = EntityType(offset=len(self.text), length=0, **args) + + def _list_depth(self): + depth = 0 + for tag in self._open_tags: + if tag == "ol" or tag == "ul": + depth += 1 + return depth + + def handle_data(self, text): + text = unescape(text) + previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else "" + list_format_offset = 0 + if previous_tag == "a": + url = self._open_tags_meta[0] + if url: + text = url + elif len(self._open_tags) > 1 and self._previous_ended_line and previous_tag == "li": + list_type = self._open_tags[1] + indent = (self._list_depth() - 1) * 4 * " " + text = text.strip("\n") + if len(text) == 0: + return + elif list_type == "ul": + text = f"{indent}* {text}" + list_format_offset = len(indent) + 2 + elif list_type == "ol": + n = self._open_tags_meta[1] + n += 1 + self._open_tags_meta[1] = n + text = f"{indent}{n}. {text}" + list_format_offset = len(indent) + 3 + for tag, entity in self._building_entities.items(): + entity.length += len(text.strip("\n")) + entity.offset += list_format_offset + + if text.endswith("\n"): + self._previous_ended_line = True + else: + self._previous_ended_line = False + + self.text += text + + def handle_endtag(self, tag): + try: + self._open_tags.popleft() + self._open_tags_meta.popleft() + except IndexError: + pass + if (tag == "ul" or tag == "ol") and self.text.endswith("\n"): + self.text = self.text[:-1] + entity = self._building_entities.pop(tag, None) + if entity: + self.entities.append(entity) + + +def matrix_to_telegram(html): + try: + parser = MatrixParser() + parser.feed(html) + return parser.text, parser.entities + except: + log.exception("Failed to convert Matrix format:\nhtml=%s", html) + def telegram_to_matrix(text, entities): + try: + return _telegram_to_matrix(text, entities) + except: + log.exception("Failed to convert Telegram format:\n" + "message=%s\n" + "entities=%s", + text, entities) + + +def _telegram_to_matrix(text, entities): if not entities: return text html = [] @@ -86,3 +235,9 @@ def telegram_to_matrix(text, entities): last_offset = entity.offset + (0 if skip_entity else entity.length) html.append(text[last_offset:]) return "".join(html) + + +def init(context): + global log + _, _, parent_log, _ = context + log = parent_log.getChild("formatter") diff --git a/mautrix_telegram/portal.py b/mautrix_telegram/portal.py index 0b3d129b..e37ebd1b 100644 --- a/mautrix_telegram/portal.py +++ b/mautrix_telegram/portal.py @@ -79,7 +79,11 @@ class Portal: def handle_matrix_message(self, sender, message): type = message["msgtype"] if type == "m.text": - sender.client.send_message(self.peer, message["body"]) + if "format" in message and message["format"] == "org.matrix.custom.html": + message, entities = formatter.matrix_to_telegram(message["formatted_body"]) + sender.send_message(self.peer, message, entities=entities) + else: + sender.send_message(self.peer, message["body"]) def handle_telegram_message(self, sender, evt): self.log.debug("Sending %s to %s by %d", evt.message, self.mxid, sender.id) diff --git a/mautrix_telegram/puppet.py b/mautrix_telegram/puppet.py index ce7e4c77..734c26f9 100644 --- a/mautrix_telegram/puppet.py +++ b/mautrix_telegram/puppet.py @@ -13,10 +13,8 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from telethon import TelegramClient -from telethon.tl.types import User as UserEntity, Chat as ChatEntity, Channel as ChannelEntity +import re from .db import Puppet as DBPuppet -from . import portal as p config = None @@ -36,6 +34,10 @@ class Puppet: self.cache[id] = self + @property + def tgid(self): + return self.id + def to_db(self): return self.db.merge( DBPuppet(id=self.id, username=self.username, displayname=self.displayname)) @@ -109,3 +111,6 @@ def init(context): global config Puppet.az, Puppet.db, log, config = context Puppet.log = log.getChild("puppet") + localpart = config.get("bridge.alias_template", "telegram_{}").format("(.+)") + hs = config["homeserver"]["domain"] + Puppet.mxid_regex = re.compile(f"@{localpart}:{hs}") diff --git a/mautrix_telegram/user.py b/mautrix_telegram/user.py index dde58eaf..7c799cea 100644 --- a/mautrix_telegram/user.py +++ b/mautrix_telegram/user.py @@ -16,7 +16,8 @@ import traceback from telethon import TelegramClient from telethon.tl.types import User as UserEntity, Chat as ChatEntity, Channel as ChannelEntity, \ - UpdateShortMessage, UpdateShortChatMessage + UpdateShortMessage, UpdateShortChatMessage, Message, UpdateShortSentMessage +from telethon.tl.functions.messages import SendMessageRequest from .db import User as DBUser from . import portal as po, puppet as pu @@ -89,6 +90,30 @@ class User: self.client = None self.connected = False + def send_message(self, entity, message, reply_to=None, entities=None, link_preview=True): + entity = self.client.get_input_entity(entity) + + request = SendMessageRequest( + peer=entity, + message=message, + entities=entities, + no_webpage=not link_preview, + reply_to_msg_id=self.client._get_reply_to(reply_to) + ) + result = self.client(request) + if isinstance(result, UpdateShortSentMessage): + return Message( + id=result.id, + to_id=entity, + message=message, + date=result.date, + out=result.out, + media=result.media, + entities=result.entities + ) + + return self.client._get_response_message(request, result) + def sync_dialogs(self): dialogs = self.client.get_dialogs(limit=30) for dialog in dialogs: