Allow reacting with any unicode emoji using custom pack

This commit is contained in:
Tulir Asokan
2022-10-01 17:17:27 +03:00
parent 81aa0084e7
commit 025b864bd8
14 changed files with 509 additions and 14 deletions
+4 -1
View File
@@ -14,5 +14,8 @@ __pycache__
/registration.yaml
*.log*
*.db
*.pickle
/*.pickle
*.bak
/*.session
/*.session-journal
/*.json
+1
View File
@@ -136,6 +136,7 @@ class Config(BaseBridgeConfig):
copy("bridge.image_as_file_pixels")
copy("bridge.parallel_file_transfer")
copy("bridge.federate_rooms")
copy("bridge.always_custom_emoji_reaction")
copy("bridge.animated_sticker.target")
copy("bridge.animated_sticker.convert_from_webm")
copy("bridge.animated_sticker.args.width")
+3
View File
@@ -224,6 +224,9 @@ bridge:
# Whether or not created rooms should have federation enabled.
# If false, created portal rooms will never be federated.
federate_rooms: true
# Should the bridge send all unicode reactions as custom emoji reactions to Telegram?
# By default, the bridge only uses custom emojis for unicode emojis that aren't allowed in reactions.
always_custom_emoji_reaction: false
# Settings for converting animated stickers.
animated_sticker:
# Format to which animated stickers should be converted.
+9 -5
View File
@@ -54,7 +54,7 @@ from mautrix.types import Format, MessageType, TextMessageEventContent
from .. import abstract_user as au, portal as po, puppet as pu, user as u
from ..db import Message as DBMessage, TelegramFile as DBTelegramFile
from ..types import TelegramID
from ..util.file_transfer import transfer_custom_emojis_to_matrix
from ..util.file_transfer import UnicodeCustomEmoji, transfer_custom_emojis_to_matrix
log: logging.Logger = logging.getLogger("mau.fmt.tg")
@@ -279,10 +279,14 @@ async def _telegram_entities_to_matrix(
elif entity_type == MessageEntityCustomEmoji:
html.append(entity_text)
elif entity_type == ReuploadedCustomEmoji:
html.append(
f'<img data-mx-emoticon data-mau-animated-emoji src="{escape(entity.file.mxc)}" '
f'height="32" width="32" alt="{entity_text}" title="{entity_text}"/>'
)
if isinstance(entity.file, UnicodeCustomEmoji):
html.append(entity.file.emoji)
else:
html.append(
f"<img data-mx-emoticon data-mau-animated-emoji"
f' src="{escape(entity.file.mxc)}" height="32" width="32"'
f' alt="{entity_text}" title="{entity_text}"/>'
)
elif entity_type in (
MessageEntityBotCommand,
MessageEntityHashtag,
+23 -1
View File
@@ -2204,6 +2204,7 @@ class Portal(DBPortal, BasePortal):
raise IgnoredMessageError(
f"Ignoring Matrix redaction of reaction to unknown event {reaction.msg_mxid}"
)
# TODO keep other reactions for premium users with multiple reactions
async with self.reaction_lock(reaction_target.mxid):
await reaction.delete()
await deleter.client(SendReactionRequest(peer=self.peer, msg_id=reaction_target.tgid))
@@ -2250,6 +2251,21 @@ class Portal(DBPortal, BasePortal):
return
reaction = ReactionCustomEmoji(document_id=int(db_reaction.id))
emoji_id = db_reaction.id
elif (
self.config["bridge.always_custom_emoji_reaction"]
or reaction.emoticon not in await user.get_available_reactions()
):
try:
doc_id = util.unicode_custom_emoji_map[reaction.emoticon]
except KeyError:
pass
else:
self.log.trace(
f"Using custom reaction {doc_id} instead of unicode {reaction.emoticon} "
f"for {user.mxid}'s reaction"
)
reaction = ReactionCustomEmoji(document_id=doc_id)
emoji_id = str(doc_id)
try:
async with self.reaction_lock(target_event_id):
await self._handle_matrix_reaction(
@@ -2303,6 +2319,7 @@ class Portal(DBPortal, BasePortal):
existing_reacts = await DBReaction.get_by_sender(msg.mxid, msg.mx_room, user.tgid)
new_tg_reactions: list[TypeReaction] = []
reactions_to_remove: list[DBReaction] = []
# TODO use config https://corefork.telegram.org/api/config#reactions-user-max-default
max_reactions = 3 if user.is_premium else 1
max_reactions -= 1 # Leave one reaction of space for the new reaction
for db_reaction in existing_reacts:
@@ -2847,6 +2864,7 @@ class Portal(DBPortal, BasePortal):
@staticmethod
async def _get_reaction_limit(sender: TelegramID) -> int:
puppet = await p.Puppet.get_by_tgid(sender, create=False)
# TODO use config https://corefork.telegram.org/api/config#reactions-user-max-default
if puppet and puppet.is_premium:
return 3
return 1
@@ -2898,7 +2916,11 @@ class Portal(DBPortal, BasePortal):
matrix_reaction = variation_selector.add(new_reaction.emoticon)
elif isinstance(new_reaction, ReactionCustomEmoji):
emoji_id = str(new_reaction.document_id)
matrix_reaction = custom_emojis[new_reaction.document_id].mxc
custom_emoji = custom_emojis[new_reaction.document_id]
if isinstance(custom_emoji, util.UnicodeCustomEmoji):
matrix_reaction = custom_emoji.emoji
else:
matrix_reaction = custom_emoji.mxc
else:
self.log.warning("Unknown reaction type %s", type(new_reaction))
continue
@@ -0,0 +1,397 @@
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2022 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Any, Literal, TypedDict
from pathlib import Path
import argparse
import asyncio
import io
import json
import logging
import math
import mimetypes
import pickle
import random
import string
from lottie.exporters import export_tgs
from lottie.exporters.cairo import export_png
from lottie.exporters.tgs_validator import Severity, TgsValidator
from lottie.importers.svg import import_svg
from lottie.objects import Animation
from lottie.utils.stripper import float_strip
from PIL import Image
from telethon import TelegramClient
from telethon.custom import Conversation, Message
from telethon.tl.functions.messages import GetStickerSetRequest
from telethon.tl.types import (
Document,
DocumentAttributeCustomEmoji,
DocumentAttributeFilename,
DocumentAttributeImageSize,
InputMediaUploadedDocument,
InputStickerSetShortName,
)
import aiohttp
mimetypes.add_type("image/webp", ".webp")
parser = argparse.ArgumentParser(description="mautrix-telegram unicode emoji packer")
parser.add_argument(
"-i", "--api-id", type=int, required=True, metavar="<api id>", help="Telegram API ID"
)
parser.add_argument(
"-a", "--api-hash", type=str, required=True, metavar="<api hash>", help="Telegram API hash"
)
parser.add_argument(
"-s",
"--session",
type=str,
default="unicodemojipacker.session",
metavar="<file name>",
help="Telethon session name",
)
parser.add_argument(
"-o",
"--output",
type=str,
default="mautrix_telegram/unicodemojipack.json",
metavar="<file name>",
help="Path to save created emoji pack document IDs",
)
parser.add_argument(
"-f",
"--font-directory",
type=Path,
required=True,
metavar="<directory path>",
help="Path to the Noto color emoji files",
)
parser.add_argument(
"-m",
"--media-directory",
type=Path,
required=True,
metavar="<directory path>",
help="Path to save converted tgs and webp emoji files",
)
args = parser.parse_args()
font_dir: Path = args.font_directory
media_dir: Path = args.media_directory
EMOJI_DATA_URL = "https://raw.githubusercontent.com/iamcal/emoji-data/master/emoji.json"
def unified_to_unicode(unified: str) -> str:
return (
"".join(rf"\U{chunk:0>8}" for chunk in unified.split("-"))
.encode("ascii")
.decode("unicode_escape")
)
def tag_to_str(unified: str) -> str:
return "".join(chr(int(x.removeprefix("E00"), 16)) for x in unified.split("-"))
EmojiType = Literal["webp", "tgs"]
PackType = Literal["Animated emoji", "Static emoji"]
class Emoji(TypedDict):
hex: str
emoji: str
type: EmojiType
filename: str
class EmojiData(TypedDict):
tgs: list[Emoji]
webp: list[Emoji]
def parse_emoji_data(tone: dict[str, Any], emoji: dict[str, Any]) -> Emoji:
hex = (tone["non_qualified"] or tone["unified"]).replace("-FE0F", "")
filename_hex = hex.replace("-", "_").lower()
filename = f"svg/emoji_u{filename_hex}.svg"
if emoji["category"] == "Flags" and emoji["subcategory"] in (
"country-flag",
"subdivision-flag",
):
filename = f"third_party/region-flags/waved-svg/emoji_u{filename_hex}.svg"
with (font_dir / filename).open() as f:
lot: Animation = import_svg(f)
float_strip(lot)
lot.tgs_sanitize()
output = io.BytesIO()
export_tgs(lot, output)
validator = TgsValidator()
validator(lot)
validator.check_size(len(output.getvalue()))
errors = [err for err in validator.errors if err.severity != Severity.Note]
if errors or ("region-flags" in filename and len(output.getvalue()) > 32768):
lot.scale(100, 100)
png_out = io.BytesIO()
export_png(lot, png_out)
img = Image.open(png_out)
output = io.BytesIO()
output.name = "image.webp"
img.save(output, "webp")
media_type: EmojiType = "webp"
else:
media_type: EmojiType = "tgs"
path = media_dir / f"{filename_hex}.{media_type}"
with path.open("wb") as f:
f.write(output.getvalue())
print(
"Converted", filename, "->", path.name, "//" if errors else "", "\n".join(map(str, errors))
)
return {
"hex": hex,
"emoji": unified_to_unicode(tone["unified"]),
"type": media_type,
"filename": path.name,
}
async def load_emoji_data() -> EmojiData:
cache_path = media_dir / "conversion-cache.json"
try:
with cache_path.open() as f:
return json.load(f)
except FileNotFoundError:
pass
async with aiohttp.ClientSession() as sess, sess.get(EMOJI_DATA_URL) as resp:
raw_emoji_data = sorted(
await resp.json(content_type=None),
key=lambda dat: dat["sort_order"],
)
tgs_emoji = []
webp_emoji = []
for emoji in raw_emoji_data:
for tone in (emoji, *emoji.get("skin_variations", {}).values()):
parsed_emoji = parse_emoji_data(tone, emoji)
if parsed_emoji["type"] == "tgs":
tgs_emoji.append(parsed_emoji)
else:
webp_emoji.append(parsed_emoji)
full_data = {"tgs": tgs_emoji, "webp": webp_emoji}
with cache_path.open("w") as f:
json.dump(full_data, f, ensure_ascii=False)
return full_data
async def create_pack(conv: Conversation, name: str, pack_type: str) -> None:
await conv.send_message("/newemojipack")
resp: Message = await conv.get_response()
assert "A new set of custom emoji" in resp.raw_text
assert "Please choose the type" in resp.raw_text
await conv.send_message(pack_type)
resp = await conv.get_response()
if pack_type == "Animated emoji":
assert "When ready to upload, tell me the name of your set." in resp.raw_text
else:
assert "Now choose a name for your set." in resp.raw_text
await conv.send_message(name)
resp = await conv.get_response()
if pack_type == "Animated emoji":
assert "Now send me the first animated emoji" in resp.raw_text
else:
assert "Now send me the custom emoji" in resp.raw_text
async def publish_pack(conv: Conversation, shortname: str) -> None:
await conv.send_message("/publish")
resp: Message = await conv.get_response()
assert "You can send me a custom emoji from your emoji set" in resp.raw_text
await conv.send_message("/skip")
resp = await conv.get_response()
assert "Please provide a short name for your emoji set" in resp.raw_text
await conv.send_message(shortname)
resp = await conv.get_response()
assert "I've just published your emoji set" in resp.raw_text
async def send_emoji(
conv: Conversation, file: bytes | Path | InputMediaUploadedDocument, emoji: str
) -> None:
await conv.send_file(file)
resp: Message = await conv.get_response()
assert "Send me a replacement emoji that corresponds to your custom emoji" in resp.raw_text
await conv.send_message(emoji)
resp = await conv.get_response()
if "Sorry, too many attempts" in resp.raw_text:
print(resp.raw_text)
input("Press enter to continue")
await conv.send_message(emoji)
resp = await conv.get_response()
while "Please send an emoji that best describes your custom emoji." in resp.raw_text:
emoji = input(f"{emoji} was rejected, provide replacement: ")
await conv.send_message(emoji)
resp = await conv.get_response()
assert "Congratulations" in resp.raw_text
class CachedPack(TypedDict):
name: str
short_name: str
part: int
type: PackType
published: bool
collected: bool
emojis: list[Emoji]
class CachedData(TypedDict):
packs: list[CachedPack]
def _split_packs_int(
emoji_list: list[Emoji], pack_type: PackType, current_part: int, total_parts: int
) -> tuple[list[CachedPack], int]:
packs = []
current_pack: CachedPack | None = None
for i, emoji in enumerate(emoji_list):
if i % 200 == 0:
current_part += 1
random_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
short_name = f"mxtg_unicodemoji_{random_id}"
name = f"mautrix-telegram unicodemoji ({current_part}/{total_parts})"
current_pack = {
"type": pack_type,
"short_name": short_name,
"part": current_part,
"name": name,
"published": False,
"collected": False,
"emojis": [],
}
packs.append(current_pack)
current_pack["emojis"].append(emoji)
return packs, current_part
def split_packs(emoji_data: EmojiData) -> list[CachedPack]:
total_parts = math.ceil(len(emoji_data["tgs"]) / 200) + math.ceil(
len(emoji_data["webp"]) / 200
)
current_part = 0
animated_packs, current_part = _split_packs_int(
emoji_data["tgs"], "Animated emoji", current_part, total_parts
)
static_packs, current_part = _split_packs_int(
emoji_data["webp"], "Static emoji", current_part, total_parts
)
return animated_packs + static_packs
async def create_and_fill_pack(
client: TelegramClient, conv: Conversation, pack: CachedPack
) -> None:
if pack["short_name"] == "mxtg_unicodemoji_xvzs6743":
print("Continuing pack", pack["name"])
else:
print("Creating pack", pack["name"])
await create_pack(conv, pack["name"], pack["type"])
total = len(pack["emojis"])
for i, emoji in enumerate(pack["emojis"]):
if pack["short_name"] == "mxtg_unicodemoji_xvzs6743" and i < 87:
continue
print(f"Adding emoji {i+1}/{total}", emoji["hex"], emoji["emoji"])
emoji_file = media_dir / emoji["filename"]
if emoji["type"] == "webp":
attrs = [
DocumentAttributeImageSize(w=100, h=100),
DocumentAttributeFilename(file_name="image.webp"),
]
with emoji_file.open("rb") as f:
file_handle = await client.upload_file(f, file_name="emoji.webp")
emoji_file = InputMediaUploadedDocument(
file_handle, mime_type="image/webp", attributes=attrs
)
await send_emoji(conv, emoji_file, emoji["emoji"])
await asyncio.sleep(2)
print("Publishing pack", pack["short_name"])
await publish_pack(conv, pack["short_name"])
async def main():
logging.basicConfig(level=logging.INFO)
emoji_data = await load_emoji_data()
split_cache = media_dir / "split-cache.json"
try:
with split_cache.open() as f:
packs: list[CachedPack] = json.load(f)
except FileNotFoundError:
packs = split_packs(emoji_data)
with split_cache.open("w") as f:
json.dump(packs, f)
doc_id_file = Path(args.output)
try:
with doc_id_file.open() as f:
doc_ids = json.load(f)
except FileNotFoundError:
doc_ids = {}
client = TelegramClient(args.session, args.api_id, args.api_hash, flood_sleep_threshold=3600)
await client.start()
async with client.conversation("Stickers", max_messages=20000) as conv:
for pack in packs:
if not pack["published"]:
await create_and_fill_pack(client, conv, pack)
pack["published"] = True
with split_cache.open("w") as f:
json.dump(packs, f, ensure_ascii=False)
if not pack["collected"] or True:
print("Collecting document IDs from pack", pack["short_name"])
stickers = await client(
GetStickerSetRequest(InputStickerSetShortName(pack["short_name"]), 0)
)
doc: Document
for i, doc in enumerate(stickers.documents):
attr = next(
attr
for attr in doc.attributes
if isinstance(attr, DocumentAttributeCustomEmoji)
)
base_emoji = attr.alt.replace("\ufe0f", "")
emoji = pack["emojis"][i]["emoji"].replace("\ufe0f", "")
doc_ids[emoji] = doc.id
print(f"Mapped {emoji} (fallback: {base_emoji}) -> {doc_ids[emoji]}")
pack["collected"] = True
with split_cache.open("w") as f:
json.dump(packs, f, ensure_ascii=False)
with doc_id_file.open("w") as f:
json.dump(doc_ids, f, ensure_ascii=False)
print("Pack completed")
await asyncio.sleep(5)
with open(args.output.replace(".json", ".pickle"), "wb") as f:
pickle.dump(doc_ids, f)
print("Wrote pickle")
asyncio.run(main())
File diff suppressed because one or more lines are too long
Binary file not shown.
+35 -1
View File
@@ -18,15 +18,16 @@ from __future__ import annotations
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterable, Awaitable, NamedTuple, cast
from datetime import datetime, timezone
import asyncio
import time
from telethon.errors import AuthKeyDuplicatedError, RPCError, UnauthorizedError
from telethon.tl.custom import Dialog
from telethon.tl.functions.account import UpdateStatusRequest
from telethon.tl.functions.contacts import GetContactsRequest, SearchRequest
from telethon.tl.functions.messages import GetAvailableReactionsRequest
from telethon.tl.functions.updates import GetStateRequest
from telethon.tl.functions.users import GetUsersRequest
from telethon.tl.types import (
Channel,
Chat,
ChatForbidden,
InputUserSelf,
@@ -43,6 +44,7 @@ from telethon.tl.types import (
User as TLUser,
)
from telethon.tl.types.contacts import ContactsNotModified
from telethon.tl.types.messages import AvailableReactions
from mautrix.appservice import DOUBLE_PUPPET_SOURCE_KEY
from mautrix.bridge import BaseUser, async_getter_lock
@@ -85,6 +87,11 @@ class User(DBUser, AbstractUser, BaseUser):
_track_connection_task: asyncio.Task | None
_is_backfilling: bool
_available_emoji_reactions: set[str] | None
_available_emoji_reactions_hash: int | None
_available_emoji_reactions_fetched: float
_available_emoji_reactions_lock: asyncio.Lock
def __init__(
self,
mxid: UserID,
@@ -110,6 +117,10 @@ class User(DBUser, AbstractUser, BaseUser):
self._track_connection_task = None
self._is_backfilling = False
self._portals_cache = None
self._available_emoji_reactions = None
self._available_emoji_reactions_hash = None
self._available_emoji_reactions_fetched = 0
self._available_emoji_reactions_lock = asyncio.Lock()
(
self.relaybot_whitelisted,
@@ -700,6 +711,29 @@ class User(DBUser, AbstractUser, BaseUser):
self.log.debug("Contact syncing complete")
return contacts
async def get_available_reactions(self) -> set[str]:
if self._available_emoji_reactions_fetched + 12 * 60 * 60 > time.monotonic():
return self._available_emoji_reactions
async with self._available_emoji_reactions_lock:
if self._available_emoji_reactions_fetched + 12 * 60 * 60 > time.monotonic():
return self._available_emoji_reactions
self.log.debug("Fetching available emoji reactions")
available_reactions = await self.client(
GetAvailableReactionsRequest(hash=self._available_emoji_reactions_hash or 0)
)
if isinstance(available_reactions, AvailableReactions):
self._available_emoji_reactions = {
react.reaction
for react in available_reactions.reactions
if self.is_premium or not react.premium
}
self._available_emoji_reactions_hash = available_reactions.hash
self._available_emoji_reactions_fetched = time.monotonic()
self.log.debug(
"Got available emoji reactions: %s", self._available_emoji_reactions
)
return self._available_emoji_reactions
# endregion
# region Class instance lookup
+7 -1
View File
@@ -1,4 +1,10 @@
from .color_log import ColorFormatter
from .file_transfer import convert_image, transfer_custom_emojis_to_matrix, transfer_file_to_matrix
from .file_transfer import (
UnicodeCustomEmoji,
convert_image,
transfer_custom_emojis_to_matrix,
transfer_file_to_matrix,
unicode_custom_emoji_map,
)
from .parallel_file_transfer import parallel_transfer_to_telegram
from .recursive_dict import recursive_del, recursive_get, recursive_set
+28 -4
View File
@@ -15,11 +15,13 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import annotations
from typing import Optional, Union
from typing import NamedTuple, Optional, Union
from io import BytesIO
from sqlite3 import IntegrityError
import asyncio
import logging
import pickle
import pkgutil
import tempfile
import time
@@ -44,7 +46,7 @@ from telethon.tl.types import (
)
from mautrix.appservice import IntentAPI
from mautrix.util import magic
from mautrix.util import magic, variation_selector
from .. import abstract_user as au
from ..db import TelegramFile as DBTelegramFile
@@ -212,15 +214,37 @@ async def transfer_thumbnail_to_matrix(
transfer_locks: dict[str, asyncio.Lock] = {}
unicode_custom_emoji_map = pickle.loads(
pkgutil.get_data("mautrix_telegram", "unicodemojipack.pickle")
)
reverse_unicode_custom_emoji_map = {
doc_id: emoji for emoji, doc_id in unicode_custom_emoji_map.items()
}
TypeThumbnail = Optional[Union[TypeLocation, TypePhotoSize]]
class UnicodeCustomEmoji(NamedTuple):
emoji: str
async def transfer_custom_emojis_to_matrix(
source: au.AbstractUser, emoji_ids: list[int]
) -> dict[int, DBTelegramFile]:
) -> dict[int, DBTelegramFile | UnicodeCustomEmoji]:
emoji_ids = set(emoji_ids)
existing_unicode = {}
for emoji_id in emoji_ids:
try:
existing_unicode[emoji_id] = UnicodeCustomEmoji(
variation_selector.add(reverse_unicode_custom_emoji_map[emoji_id])
)
except KeyError:
pass
emoji_ids -= existing_unicode.keys()
if not emoji_ids:
return existing_unicode
existing = await DBTelegramFile.get_many([str(id) for id in emoji_ids])
file_map = {int(file.id): file for file in existing}
file_map = {int(file.id): file for file in existing} | existing_unicode
not_existing_ids = list(emoji_ids - file_map.keys())
if not_existing_ids:
log.debug(f"Transferring custom emojis through {source.mxid}: {not_existing_ids}")
+1 -1
View File
@@ -66,7 +66,7 @@ setuptools.setup(
],
package_data={"mautrix_telegram": [
"web/public/*.mako", "web/public/*.png", "web/public/*.css",
"example-config.yaml",
"example-config.yaml", "unicodemojipack.pickle",
]},
data_files=[
(".", ["mautrix_telegram/example-config.yaml"]),