diff --git a/CHANGELOG.md b/CHANGELOG.md index 0faf0b89..aa95f159 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Minimum Conduit version remains at 0.4.0. ### Added * Added provisioning API for resolving Telegram identifiers (like usernames). +* Added basic bridging of Telegram custom emojis to Matrix. * Added option to not bridge chats with lots of members. * Added option to include captions in the same message as the media to implement [MSC2530]. Sending captions the same way is also supported and diff --git a/ROADMAP.md b/ROADMAP.md index 8ea37c42..1bf6ba27 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -24,6 +24,7 @@ * Telegram → Matrix * [x] Message content (text, formatting, files, etc..) * [ ] Advanced message content/media + * [x] Custom emojis * [x] Polls * [x] Games * [ ] Buttons diff --git a/mautrix_telegram/db/telegram_file.py b/mautrix_telegram/db/telegram_file.py index 5e72e357..1e48fb22 100644 --- a/mautrix_telegram/db/telegram_file.py +++ b/mautrix_telegram/db/telegram_file.py @@ -17,10 +17,11 @@ from __future__ import annotations from typing import TYPE_CHECKING, ClassVar +from asyncpg import Record from attr import dataclass from mautrix.types import ContentURI, EncryptedFile -from mautrix.util.async_db import Database +from mautrix.util.async_db import Database, Scheme fake_db = Database.create("") if TYPE_CHECKING else None @@ -40,28 +41,47 @@ class TelegramFile: decryption_info: EncryptedFile | None thumbnail: TelegramFile | None = None + columns: ClassVar[str] = ( + "id, mxc, mime_type, was_converted, timestamp, size, width, height, thumbnail, " + "decryption_info" + ) + @classmethod - async def get(cls, loc_id: str, *, _thumbnail: bool = False) -> TelegramFile | None: - q = ( - "SELECT id, mxc, mime_type, was_converted, timestamp, size, width, height, thumbnail," - " decryption_info " - "FROM telegram_file WHERE id=$1" - ) - row = await cls.db.fetchrow(q, loc_id) + def _from_row(cls, row: Record | None) -> TelegramFile | None: if row is None: return None data = {**row} - thumbnail_id = data.pop("thumbnail", None) - if _thumbnail: - # Don't allow more than one level of recursion - thumbnail_id = None + data.pop("thumbnail", None) decryption_info = data.pop("decryption_info", None) return cls( **data, - thumbnail=(await cls.get(thumbnail_id, _thumbnail=True)) if thumbnail_id else None, + thumbnail=None, decryption_info=EncryptedFile.parse_json(decryption_info) if decryption_info else None, ) + @classmethod + async def get_many(cls, loc_ids: list[str]) -> list[TelegramFile]: + if cls.db.scheme in (Scheme.POSTGRES, Scheme.COCKROACH): + q = f"SELECT {cls.columns} FROM telegram_file WHERE id=ANY($1)" + rows = await cls.db.fetch(q, loc_ids) + else: + tgid_placeholders = ("?," * len(loc_ids)).rstrip(",") + q = f"SELECT {cls.columns} FROM telegram_file WHERE id IN ({tgid_placeholders})" + rows = await cls.db.fetch(q, *loc_ids) + return [cls._from_row(row) for row in rows] + + @classmethod + async def get(cls, loc_id: str, *, _thumbnail: bool = False) -> TelegramFile | None: + q = f"SELECT {cls.columns} FROM telegram_file WHERE id=$1" + row = await cls.db.fetchrow(q, loc_id) + file = cls._from_row(row) + if file is None: + return None + thumbnail_id = row.get("thumbnail", None) + if thumbnail_id and not _thumbnail: + file.thumbnail = await cls.get(thumbnail_id, _thumbnail=True) + return file + async def insert(self) -> None: q = ( "INSERT INTO telegram_file (id, mxc, mime_type, was_converted, size, width, height, " diff --git a/mautrix_telegram/formatter/from_telegram.py b/mautrix_telegram/formatter/from_telegram.py index ea0c4f12..48711583 100644 --- a/mautrix_telegram/formatter/from_telegram.py +++ b/mautrix_telegram/formatter/from_telegram.py @@ -20,7 +20,7 @@ import logging import re from telethon.errors import RPCError -from telethon.helpers import add_surrogate, del_surrogate, within_surrogate +from telethon.helpers import add_surrogate, del_surrogate from telethon.tl.custom import Message from telethon.tl.types import ( MessageEntityBlockquote, @@ -52,8 +52,9 @@ from telethon.tl.types import ( from mautrix.types import Format, MessageType, TextMessageEventContent from .. import abstract_user as au, portal as po, puppet as pu, user as u -from ..db import Message as DBMessage +from ..db import Message as DBMessage, TelegramFile as DBTelegramFile from ..types import TelegramID +from ..util.file_transfer import transfer_custom_emojis_to_matrix log: logging.Logger = logging.getLogger("mau.fmt.tg") @@ -125,6 +126,27 @@ async def _add_forward_header( ) +class ReuploadedCustomEmoji(MessageEntityCustomEmoji): + file: DBTelegramFile + + def __init__(self, parent: MessageEntityCustomEmoji, file: DBTelegramFile) -> None: + super().__init__(parent.offset, parent.length, parent.document_id) + self.file = file + + +async def _convert_custom_emoji( + source: au.AbstractUser, entities: list[TypeMessageEntity] +) -> None: + emoji_ids = [ + entity.document_id for entity in entities if isinstance(entity, MessageEntityCustomEmoji) + ] + custom_emojis = await transfer_custom_emojis_to_matrix(source, emoji_ids) + if len(custom_emojis) > 0: + for i, entity in enumerate(entities): + if isinstance(entity, MessageEntityCustomEmoji): + entities[i] = ReuploadedCustomEmoji(entity, custom_emojis[entity.document_id]) + + async def telegram_to_matrix( evt: Message | SponsoredMessage, source: au.AbstractUser, @@ -138,6 +160,7 @@ async def telegram_to_matrix( ) entities = override_entities or evt.entities if entities: + await _convert_custom_emoji(source, entities) content.format = Format.HTML html = await _telegram_entities_to_matrix_catch(add_surrogate(content.body), entities) content.formatted_body = del_surrogate(html) @@ -166,9 +189,20 @@ async def _telegram_entities_to_matrix_catch(text: str, entities: list[TypeMessa return "[failed conversion in _telegram_entities_to_matrix]" +def within_surrogate(text, index): + """ + `True` if ``index`` is within a surrogate (before and after it, not at!). + """ + return ( + 1 < index < len(text) # in bounds + and "\ud800" <= text[index - 1] <= "\udbff" # current is low surrogate + and "\udc00" <= text[index] <= "\udfff" # previous is high surrogate + ) + + async def _telegram_entities_to_matrix( text: str, - entities: list[TypeMessageEntity], + entities: list[TypeMessageEntity | ReuploadedCustomEmoji], offset: int = 0, length: int = None, in_codeblock: bool = False, @@ -197,10 +231,9 @@ async def _telegram_entities_to_matrix( elif relative_offset < last_offset: continue - # TODO this breaks when there are lots of emojis in a row (e.g. custom emojis) - # while within_surrogate(text, relative_offset, length=length): - # relative_offset += 1 - while within_surrogate(text, relative_offset + entity.length, length=length): + while within_surrogate(text, relative_offset): + relative_offset += 1 + while within_surrogate(text, relative_offset + entity.length): entity.length += 1 skip_entity = False @@ -244,8 +277,12 @@ async def _telegram_entities_to_matrix( html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None ) elif entity_type == MessageEntityCustomEmoji: - # TODO support properly html.append(entity_text) + elif entity_type == ReuploadedCustomEmoji: + html.append( + f'' + ) elif entity_type in ( MessageEntityBotCommand, MessageEntityHashtag, diff --git a/mautrix_telegram/util/file_transfer.py b/mautrix_telegram/util/file_transfer.py index 8f3e05a5..aef55145 100644 --- a/mautrix_telegram/util/file_transfer.py +++ b/mautrix_telegram/util/file_transfer.py @@ -31,6 +31,7 @@ from telethon.errors import ( LocationInvalidError, SecurityError, ) +from telethon.tl.functions.messages import GetCustomEmojiDocumentsRequest from telethon.tl.types import ( Document, InputDocumentFileLocation, @@ -45,6 +46,7 @@ import magic from mautrix.appservice import IntentAPI +from .. import abstract_user as au from ..db import TelegramFile as DBTelegramFile from ..tgclient import MautrixTelegramClient from ..util import sane_mimetypes @@ -212,6 +214,37 @@ transfer_locks: dict[str, asyncio.Lock] = {} TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]] +async def transfer_custom_emojis_to_matrix( + source: au.AbstractUser, emoji_ids: list[int] +) -> dict[int, DBTelegramFile]: + emoji_ids = set(emoji_ids) + existing = await DBTelegramFile.get_many([str(id) for id in emoji_ids]) + file_map = {int(file.id): file for file in existing} + not_existing_ids = list(emoji_ids - file_map.keys()) + if not_existing_ids: + log.debug(f"Transferring custom emojis through {source.mxid}: {not_existing_ids}") + + documents: list[Document] = await source.client( + GetCustomEmojiDocumentsRequest(document_id=not_existing_ids) + ) + + async def transfer(document: Document) -> None: + file_map[document.id] = await transfer_file_to_matrix( + source.client, + source.bridge.az.intent, + document, + is_sticker=True, + tgs_convert={"target": "png", "args": {"width": 256, "height": 256}}, + filename=f"emoji-{document.id}", + # Emojis are used as inline images and can't be encrypted + encrypt=False, + async_upload=source.config["homeserver.async_media"], + ) + + await asyncio.gather(*[transfer(doc) for doc in documents]) + return file_map + + async def transfer_file_to_matrix( client: MautrixTelegramClient, intent: IntentAPI,