Allow reacting with any unicode emoji using custom pack

2022-10-01 17:17:27 +03:00
parent 81aa0084e7
commit 025b864bd8
14 changed files with 509 additions and 14 deletions
@@ -14,5 +14,8 @@ __pycache__
 /registration.yaml
 *.log*
 *.db
-*.pickle
+/*.pickle
 *.bak
+/*.session
+/*.session-journal
+/*.json
@@ -136,6 +136,7 @@ class Config(BaseBridgeConfig):
        copy("bridge.image_as_file_pixels")
        copy("bridge.parallel_file_transfer")
        copy("bridge.federate_rooms")
+        copy("bridge.always_custom_emoji_reaction")
        copy("bridge.animated_sticker.target")
        copy("bridge.animated_sticker.convert_from_webm")
        copy("bridge.animated_sticker.args.width")
@@ -224,6 +224,9 @@ bridge:
    # Whether or not created rooms should have federation enabled.
    # If false, created portal rooms will never be federated.
    federate_rooms: true
+    # Should the bridge send all unicode reactions as custom emoji reactions to Telegram?
+    # By default, the bridge only uses custom emojis for unicode emojis that aren't allowed in reactions.
+    always_custom_emoji_reaction: false
    # Settings for converting animated stickers.
    animated_sticker:
        # Format to which animated stickers should be converted.
@@ -54,7 +54,7 @@ from mautrix.types import Format, MessageType, TextMessageEventContent
 from .. import abstract_user as au, portal as po, puppet as pu, user as u
 from ..db import Message as DBMessage, TelegramFile as DBTelegramFile
 from ..types import TelegramID
-from ..util.file_transfer import transfer_custom_emojis_to_matrix
+from ..util.file_transfer import UnicodeCustomEmoji, transfer_custom_emojis_to_matrix

 log: logging.Logger = logging.getLogger("mau.fmt.tg")

@@ -279,10 +279,14 @@ async def _telegram_entities_to_matrix(
        elif entity_type == MessageEntityCustomEmoji:
            html.append(entity_text)
        elif entity_type == ReuploadedCustomEmoji:
-            html.append(
-                f'<img data-mx-emoticon data-mau-animated-emoji src="{escape(entity.file.mxc)}" '
-                f'height="32" width="32" alt="{entity_text}" title="{entity_text}"/>'
-            )
+            if isinstance(entity.file, UnicodeCustomEmoji):
+                html.append(entity.file.emoji)
+            else:
+                html.append(
+                    f"<img data-mx-emoticon data-mau-animated-emoji"
+                    f' src="{escape(entity.file.mxc)}" height="32" width="32"'
+                    f' alt="{entity_text}" title="{entity_text}"/>'
+                )
        elif entity_type in (
            MessageEntityBotCommand,
            MessageEntityHashtag,
@@ -2204,6 +2204,7 @@ class Portal(DBPortal, BasePortal):
            raise IgnoredMessageError(
                f"Ignoring Matrix redaction of reaction to unknown event {reaction.msg_mxid}"
            )
+        # TODO keep other reactions for premium users with multiple reactions
        async with self.reaction_lock(reaction_target.mxid):
            await reaction.delete()
            await deleter.client(SendReactionRequest(peer=self.peer, msg_id=reaction_target.tgid))
@@ -2250,6 +2251,21 @@ class Portal(DBPortal, BasePortal):
                return
            reaction = ReactionCustomEmoji(document_id=int(db_reaction.id))
            emoji_id = db_reaction.id
+        elif (
+            self.config["bridge.always_custom_emoji_reaction"]
+            or reaction.emoticon not in await user.get_available_reactions()
+        ):
+            try:
+                doc_id = util.unicode_custom_emoji_map[reaction.emoticon]
+            except KeyError:
+                pass
+            else:
+                self.log.trace(
+                    f"Using custom reaction {doc_id} instead of unicode {reaction.emoticon} "
+                    f"for {user.mxid}'s reaction"
+                )
+                reaction = ReactionCustomEmoji(document_id=doc_id)
+                emoji_id = str(doc_id)
        try:
            async with self.reaction_lock(target_event_id):
                await self._handle_matrix_reaction(
@@ -2303,6 +2319,7 @@ class Portal(DBPortal, BasePortal):
        existing_reacts = await DBReaction.get_by_sender(msg.mxid, msg.mx_room, user.tgid)
        new_tg_reactions: list[TypeReaction] = []
        reactions_to_remove: list[DBReaction] = []
+        # TODO use config https://corefork.telegram.org/api/config#reactions-user-max-default
        max_reactions = 3 if user.is_premium else 1
        max_reactions -= 1  # Leave one reaction of space for the new reaction
        for db_reaction in existing_reacts:
@@ -2847,6 +2864,7 @@ class Portal(DBPortal, BasePortal):
    @staticmethod
    async def _get_reaction_limit(sender: TelegramID) -> int:
        puppet = await p.Puppet.get_by_tgid(sender, create=False)
+        # TODO use config https://corefork.telegram.org/api/config#reactions-user-max-default
        if puppet and puppet.is_premium:
            return 3
        return 1
@@ -2898,7 +2916,11 @@ class Portal(DBPortal, BasePortal):
                    matrix_reaction = variation_selector.add(new_reaction.emoticon)
                elif isinstance(new_reaction, ReactionCustomEmoji):
                    emoji_id = str(new_reaction.document_id)
-                    matrix_reaction = custom_emojis[new_reaction.document_id].mxc
+                    custom_emoji = custom_emojis[new_reaction.document_id]
+                    if isinstance(custom_emoji, util.UnicodeCustomEmoji):
+                        matrix_reaction = custom_emoji.emoji
+                    else:
+                        matrix_reaction = custom_emoji.mxc
                else:
                    self.log.warning("Unknown reaction type %s", type(new_reaction))
                    continue
@@ -0,0 +1,397 @@
+# mautrix-telegram - A Matrix-Telegram puppeting bridge
+# Copyright (C) 2022 Tulir Asokan
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from typing import Any, Literal, TypedDict
+from pathlib import Path
+import argparse
+import asyncio
+import io
+import json
+import logging
+import math
+import mimetypes
+import pickle
+import random
+import string
+
+from lottie.exporters import export_tgs
+from lottie.exporters.cairo import export_png
+from lottie.exporters.tgs_validator import Severity, TgsValidator
+from lottie.importers.svg import import_svg
+from lottie.objects import Animation
+from lottie.utils.stripper import float_strip
+from PIL import Image
+from telethon import TelegramClient
+from telethon.custom import Conversation, Message
+from telethon.tl.functions.messages import GetStickerSetRequest
+from telethon.tl.types import (
+    Document,
+    DocumentAttributeCustomEmoji,
+    DocumentAttributeFilename,
+    DocumentAttributeImageSize,
+    InputMediaUploadedDocument,
+    InputStickerSetShortName,
+)
+import aiohttp
+
+mimetypes.add_type("image/webp", ".webp")
+
+parser = argparse.ArgumentParser(description="mautrix-telegram unicode emoji packer")
+parser.add_argument(
+    "-i", "--api-id", type=int, required=True, metavar="<api id>", help="Telegram API ID"
+)
+parser.add_argument(
+    "-a", "--api-hash", type=str, required=True, metavar="<api hash>", help="Telegram API hash"
+)
+parser.add_argument(
+    "-s",
+    "--session",
+    type=str,
+    default="unicodemojipacker.session",
+    metavar="<file name>",
+    help="Telethon session name",
+)
+parser.add_argument(
+    "-o",
+    "--output",
+    type=str,
+    default="mautrix_telegram/unicodemojipack.json",
+    metavar="<file name>",
+    help="Path to save created emoji pack document IDs",
+)
+parser.add_argument(
+    "-f",
+    "--font-directory",
+    type=Path,
+    required=True,
+    metavar="<directory path>",
+    help="Path to the Noto color emoji files",
+)
+parser.add_argument(
+    "-m",
+    "--media-directory",
+    type=Path,
+    required=True,
+    metavar="<directory path>",
+    help="Path to save converted tgs and webp emoji files",
+)
+args = parser.parse_args()
+font_dir: Path = args.font_directory
+media_dir: Path = args.media_directory
+
+EMOJI_DATA_URL = "https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json"
+
+
+def unified_to_unicode(unified: str) -> str:
+    return (
+        "".join(rf"\U{chunk:0>8}" for chunk in unified.split("-"))
+        .encode("ascii")
+        .decode("unicode_escape")
+    )
+
+
+def tag_to_str(unified: str) -> str:
+    return "".join(chr(int(x.removeprefix("E00"), 16)) for x in unified.split("-"))
+
+
+EmojiType = Literal["webp", "tgs"]
+PackType = Literal["Animated emoji", "Static emoji"]
+
+
+class Emoji(TypedDict):
+    hex: str
+    emoji: str
+    type: EmojiType
+    filename: str
+
+
+class EmojiData(TypedDict):
+    tgs: list[Emoji]
+    webp: list[Emoji]
+
+
+def parse_emoji_data(tone: dict[str, Any], emoji: dict[str, Any]) -> Emoji:
+    hex = (tone["non_qualified"] or tone["unified"]).replace("-FE0F", "")
+    filename_hex = hex.replace("-", "_").lower()
+    filename = f"svg/emoji_u{filename_hex}.svg"
+    if emoji["category"] == "Flags" and emoji["subcategory"] in (
+        "country-flag",
+        "subdivision-flag",
+    ):
+        filename = f"third_party/region-flags/waved-svg/emoji_u{filename_hex}.svg"
+
+    with (font_dir / filename).open() as f:
+        lot: Animation = import_svg(f)
+    float_strip(lot)
+    lot.tgs_sanitize()
+
+    output = io.BytesIO()
+    export_tgs(lot, output)
+
+    validator = TgsValidator()
+    validator(lot)
+    validator.check_size(len(output.getvalue()))
+    errors = [err for err in validator.errors if err.severity != Severity.Note]
+    if errors or ("region-flags" in filename and len(output.getvalue()) > 32768):
+        lot.scale(100, 100)
+
+        png_out = io.BytesIO()
+        export_png(lot, png_out)
+        img = Image.open(png_out)
+        output = io.BytesIO()
+        output.name = "image.webp"
+        img.save(output, "webp")
+
+        media_type: EmojiType = "webp"
+    else:
+        media_type: EmojiType = "tgs"
+    path = media_dir / f"{filename_hex}.{media_type}"
+    with path.open("wb") as f:
+        f.write(output.getvalue())
+    print(
+        "Converted", filename, "->", path.name, "//" if errors else "", "\n".join(map(str, errors))
+    )
+
+    return {
+        "hex": hex,
+        "emoji": unified_to_unicode(tone["unified"]),
+        "type": media_type,
+        "filename": path.name,
+    }
+
+
+async def load_emoji_data() -> EmojiData:
+    cache_path = media_dir / "conversion-cache.json"
+    try:
+        with cache_path.open() as f:
+            return json.load(f)
+    except FileNotFoundError:
+        pass
+    async with aiohttp.ClientSession() as sess, sess.get(EMOJI_DATA_URL) as resp:
+        raw_emoji_data = sorted(
+            await resp.json(content_type=None),
+            key=lambda dat: dat["sort_order"],
+        )
+    tgs_emoji = []
+    webp_emoji = []
+    for emoji in raw_emoji_data:
+        for tone in (emoji, *emoji.get("skin_variations", {}).values()):
+            parsed_emoji = parse_emoji_data(tone, emoji)
+            if parsed_emoji["type"] == "tgs":
+                tgs_emoji.append(parsed_emoji)
+            else:
+                webp_emoji.append(parsed_emoji)
+    full_data = {"tgs": tgs_emoji, "webp": webp_emoji}
+    with cache_path.open("w") as f:
+        json.dump(full_data, f, ensure_ascii=False)
+    return full_data
+
+
+async def create_pack(conv: Conversation, name: str, pack_type: str) -> None:
+    await conv.send_message("/newemojipack")
+    resp: Message = await conv.get_response()
+    assert "A new set of custom emoji" in resp.raw_text
+    assert "Please choose the type" in resp.raw_text
+    await conv.send_message(pack_type)
+    resp = await conv.get_response()
+    if pack_type == "Animated emoji":
+        assert "When ready to upload, tell me the name of your set." in resp.raw_text
+    else:
+        assert "Now choose a name for your set." in resp.raw_text
+    await conv.send_message(name)
+    resp = await conv.get_response()
+    if pack_type == "Animated emoji":
+        assert "Now send me the first animated emoji" in resp.raw_text
+    else:
+        assert "Now send me the custom emoji" in resp.raw_text
+
+
+async def publish_pack(conv: Conversation, shortname: str) -> None:
+    await conv.send_message("/publish")
+
+    resp: Message = await conv.get_response()
+    assert "You can send me a custom emoji from your emoji set" in resp.raw_text
+    await conv.send_message("/skip")
+
+    resp = await conv.get_response()
+    assert "Please provide a short name for your emoji set" in resp.raw_text
+    await conv.send_message(shortname)
+
+    resp = await conv.get_response()
+    assert "I've just published your emoji set" in resp.raw_text
+
+
+async def send_emoji(
+    conv: Conversation, file: bytes | Path | InputMediaUploadedDocument, emoji: str
+) -> None:
+    await conv.send_file(file)
+    resp: Message = await conv.get_response()
+    assert "Send me a replacement emoji that corresponds to your custom emoji" in resp.raw_text
+    await conv.send_message(emoji)
+    resp = await conv.get_response()
+    if "Sorry, too many attempts" in resp.raw_text:
+        print(resp.raw_text)
+        input("Press enter to continue")
+        await conv.send_message(emoji)
+        resp = await conv.get_response()
+    while "Please send an emoji that best describes your custom emoji." in resp.raw_text:
+        emoji = input(f"{emoji} was rejected, provide replacement: ")
+        await conv.send_message(emoji)
+        resp = await conv.get_response()
+    assert "Congratulations" in resp.raw_text
+
+
+class CachedPack(TypedDict):
+    name: str
+    short_name: str
+    part: int
+    type: PackType
+    published: bool
+    collected: bool
+    emojis: list[Emoji]
+
+
+class CachedData(TypedDict):
+    packs: list[CachedPack]
+
+
+def _split_packs_int(
+    emoji_list: list[Emoji], pack_type: PackType, current_part: int, total_parts: int
+) -> tuple[list[CachedPack], int]:
+    packs = []
+    current_pack: CachedPack | None = None
+    for i, emoji in enumerate(emoji_list):
+        if i % 200 == 0:
+            current_part += 1
+            random_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+            short_name = f"mxtg_unicodemoji_{random_id}"
+            name = f"mautrix-telegram unicodemoji ({current_part}/{total_parts})"
+            current_pack = {
+                "type": pack_type,
+                "short_name": short_name,
+                "part": current_part,
+                "name": name,
+                "published": False,
+                "collected": False,
+                "emojis": [],
+            }
+            packs.append(current_pack)
+        current_pack["emojis"].append(emoji)
+    return packs, current_part
+
+
+def split_packs(emoji_data: EmojiData) -> list[CachedPack]:
+    total_parts = math.ceil(len(emoji_data["tgs"]) / 200) + math.ceil(
+        len(emoji_data["webp"]) / 200
+    )
+    current_part = 0
+    animated_packs, current_part = _split_packs_int(
+        emoji_data["tgs"], "Animated emoji", current_part, total_parts
+    )
+    static_packs, current_part = _split_packs_int(
+        emoji_data["webp"], "Static emoji", current_part, total_parts
+    )
+    return animated_packs + static_packs
+
+
+async def create_and_fill_pack(
+    client: TelegramClient, conv: Conversation, pack: CachedPack
+) -> None:
+    if pack["short_name"] == "mxtg_unicodemoji_xvzs6743":
+        print("Continuing pack", pack["name"])
+    else:
+        print("Creating pack", pack["name"])
+        await create_pack(conv, pack["name"], pack["type"])
+    total = len(pack["emojis"])
+    for i, emoji in enumerate(pack["emojis"]):
+        if pack["short_name"] == "mxtg_unicodemoji_xvzs6743" and i < 87:
+            continue
+        print(f"Adding emoji {i+1}/{total}", emoji["hex"], emoji["emoji"])
+        emoji_file = media_dir / emoji["filename"]
+        if emoji["type"] == "webp":
+            attrs = [
+                DocumentAttributeImageSize(w=100, h=100),
+                DocumentAttributeFilename(file_name="image.webp"),
+            ]
+            with emoji_file.open("rb") as f:
+                file_handle = await client.upload_file(f, file_name="emoji.webp")
+            emoji_file = InputMediaUploadedDocument(
+                file_handle, mime_type="image/webp", attributes=attrs
+            )
+        await send_emoji(conv, emoji_file, emoji["emoji"])
+        await asyncio.sleep(2)
+    print("Publishing pack", pack["short_name"])
+    await publish_pack(conv, pack["short_name"])
+
+
+async def main():
+    logging.basicConfig(level=logging.INFO)
+
+    emoji_data = await load_emoji_data()
+
+    split_cache = media_dir / "split-cache.json"
+    try:
+        with split_cache.open() as f:
+            packs: list[CachedPack] = json.load(f)
+    except FileNotFoundError:
+        packs = split_packs(emoji_data)
+        with split_cache.open("w") as f:
+            json.dump(packs, f)
+
+    doc_id_file = Path(args.output)
+    try:
+        with doc_id_file.open() as f:
+            doc_ids = json.load(f)
+    except FileNotFoundError:
+        doc_ids = {}
+
+    client = TelegramClient(args.session, args.api_id, args.api_hash, flood_sleep_threshold=3600)
+    await client.start()
+    async with client.conversation("Stickers", max_messages=20000) as conv:
+        for pack in packs:
+            if not pack["published"]:
+                await create_and_fill_pack(client, conv, pack)
+                pack["published"] = True
+                with split_cache.open("w") as f:
+                    json.dump(packs, f, ensure_ascii=False)
+            if not pack["collected"] or True:
+                print("Collecting document IDs from pack", pack["short_name"])
+                stickers = await client(
+                    GetStickerSetRequest(InputStickerSetShortName(pack["short_name"]), 0)
+                )
+                doc: Document
+                for i, doc in enumerate(stickers.documents):
+                    attr = next(
+                        attr
+                        for attr in doc.attributes
+                        if isinstance(attr, DocumentAttributeCustomEmoji)
+                    )
+                    base_emoji = attr.alt.replace("\ufe0f", "")
+                    emoji = pack["emojis"][i]["emoji"].replace("\ufe0f", "")
+                    doc_ids[emoji] = doc.id
+                    print(f"Mapped {emoji} (fallback: {base_emoji}) -> {doc_ids[emoji]}")
+                pack["collected"] = True
+                with split_cache.open("w") as f:
+                    json.dump(packs, f, ensure_ascii=False)
+                with doc_id_file.open("w") as f:
+                    json.dump(doc_ids, f, ensure_ascii=False)
+                print("Pack completed")
+                await asyncio.sleep(5)
+    with open(args.output.replace(".json", ".pickle"), "wb") as f:
+        pickle.dump(doc_ids, f)
+    print("Wrote pickle")
+
+
+asyncio.run(main())
@@ -18,15 +18,16 @@ from __future__ import annotations
 from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Awaitable, NamedTuple, cast
 from datetime import datetime, timezone
 import asyncio
+import time

 from telethon.errors import AuthKeyDuplicatedError, RPCError, UnauthorizedError
 from telethon.tl.custom import Dialog
 from telethon.tl.functions.account import UpdateStatusRequest
 from telethon.tl.functions.contacts import GetContactsRequest, SearchRequest
+from telethon.tl.functions.messages import GetAvailableReactionsRequest
 from telethon.tl.functions.updates import GetStateRequest
 from telethon.tl.functions.users import GetUsersRequest
 from telethon.tl.types import (
-    Channel,
    Chat,
    ChatForbidden,
    InputUserSelf,
@@ -43,6 +44,7 @@ from telethon.tl.types import (
    User as TLUser,
 )
 from telethon.tl.types.contacts import ContactsNotModified
+from telethon.tl.types.messages import AvailableReactions

 from mautrix.appservice import DOUBLE_PUPPET_SOURCE_KEY
 from mautrix.bridge import BaseUser, async_getter_lock
@@ -85,6 +87,11 @@ class User(DBUser, AbstractUser, BaseUser):
    _track_connection_task: asyncio.Task | None
    _is_backfilling: bool

+    _available_emoji_reactions: set[str] | None
+    _available_emoji_reactions_hash: int | None
+    _available_emoji_reactions_fetched: float
+    _available_emoji_reactions_lock: asyncio.Lock
+
    def __init__(
        self,
        mxid: UserID,
@@ -110,6 +117,10 @@ class User(DBUser, AbstractUser, BaseUser):
        self._track_connection_task = None
        self._is_backfilling = False
        self._portals_cache = None
+        self._available_emoji_reactions = None
+        self._available_emoji_reactions_hash = None
+        self._available_emoji_reactions_fetched = 0
+        self._available_emoji_reactions_lock = asyncio.Lock()

        (
            self.relaybot_whitelisted,
@@ -700,6 +711,29 @@ class User(DBUser, AbstractUser, BaseUser):
        self.log.debug("Contact syncing complete")
        return contacts

+    async def get_available_reactions(self) -> set[str]:
+        if self._available_emoji_reactions_fetched + 12 * 60 * 60 > time.monotonic():
+            return self._available_emoji_reactions
+        async with self._available_emoji_reactions_lock:
+            if self._available_emoji_reactions_fetched + 12 * 60 * 60 > time.monotonic():
+                return self._available_emoji_reactions
+            self.log.debug("Fetching available emoji reactions")
+            available_reactions = await self.client(
+                GetAvailableReactionsRequest(hash=self._available_emoji_reactions_hash or 0)
+            )
+            if isinstance(available_reactions, AvailableReactions):
+                self._available_emoji_reactions = {
+                    react.reaction
+                    for react in available_reactions.reactions
+                    if self.is_premium or not react.premium
+                }
+                self._available_emoji_reactions_hash = available_reactions.hash
+                self._available_emoji_reactions_fetched = time.monotonic()
+                self.log.debug(
+                    "Got available emoji reactions: %s", self._available_emoji_reactions
+                )
+            return self._available_emoji_reactions
+
    # endregion
    # region Class instance lookup

@@ -1,4 +1,10 @@
 from .color_log import ColorFormatter
-from .file_transfer import convert_image, transfer_custom_emojis_to_matrix, transfer_file_to_matrix
+from .file_transfer import (
+    UnicodeCustomEmoji,
+    convert_image,
+    transfer_custom_emojis_to_matrix,
+    transfer_file_to_matrix,
+    unicode_custom_emoji_map,
+)
 from .parallel_file_transfer import parallel_transfer_to_telegram
 from .recursive_dict import recursive_del, recursive_get, recursive_set
@@ -15,11 +15,13 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 from __future__ import annotations

-from typing import Optional, Union
+from typing import NamedTuple, Optional, Union
 from io import BytesIO
 from sqlite3 import IntegrityError
 import asyncio
 import logging
+import pickle
+import pkgutil
 import tempfile
 import time

@@ -44,7 +46,7 @@ from telethon.tl.types import (
 )

 from mautrix.appservice import IntentAPI
-from mautrix.util import magic
+from mautrix.util import magic, variation_selector

 from .. import abstract_user as au
 from ..db import TelegramFile as DBTelegramFile
@@ -212,15 +214,37 @@ async def transfer_thumbnail_to_matrix(

 transfer_locks: dict[str, asyncio.Lock] = {}

+unicode_custom_emoji_map = pickle.loads(
+    pkgutil.get_data("mautrix_telegram", "unicodemojipack.pickle")
+)
+reverse_unicode_custom_emoji_map = {
+    doc_id: emoji for emoji, doc_id in unicode_custom_emoji_map.items()
+}
+
 TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]]


+class UnicodeCustomEmoji(NamedTuple):
+    emoji: str
+
+
 async def transfer_custom_emojis_to_matrix(
    source: au.AbstractUser, emoji_ids: list[int]
-) -> dict[int, DBTelegramFile]:
+) -> dict[int, DBTelegramFile | UnicodeCustomEmoji]:
    emoji_ids = set(emoji_ids)
+    existing_unicode = {}
+    for emoji_id in emoji_ids:
+        try:
+            existing_unicode[emoji_id] = UnicodeCustomEmoji(
+                variation_selector.add(reverse_unicode_custom_emoji_map[emoji_id])
+            )
+        except KeyError:
+            pass
+    emoji_ids -= existing_unicode.keys()
+    if not emoji_ids:
+        return existing_unicode
    existing = await DBTelegramFile.get_many([str(id) for id in emoji_ids])
-    file_map = {int(file.id): file for file in existing}
+    file_map = {int(file.id): file for file in existing} | existing_unicode
    not_existing_ids = list(emoji_ids - file_map.keys())
    if not_existing_ids:
        log.debug(f"Transferring custom emojis through {source.mxid}: {not_existing_ids}")
@@ -66,7 +66,7 @@ setuptools.setup(
    ],
    package_data={"mautrix_telegram": [
        "web/public/*.mako", "web/public/*.png", "web/public/*.css",
-        "example-config.yaml",
+        "example-config.yaml", "unicodemojipack.pickle",
    ]},
    data_files=[
        (".", ["mautrix_telegram/example-config.yaml"]),