Update Telethon and strip empty entities when sending to Telegram

This commit is contained in:
Tulir Asokan
2022-08-02 13:46:06 +03:00
parent bea9bc4ec0
commit 473ab17fe7
4 changed files with 19 additions and 10 deletions
@@ -18,7 +18,7 @@ from __future__ import annotations
import re
from telethon import TelegramClient
from telethon.helpers import add_surrogate, del_surrogate
from telethon.helpers import add_surrogate, del_surrogate, strip_text
from telethon.tl.types import MessageEntityItalic, TypeMessageEntity
from mautrix.types import MessageEventContent, RoomID
@@ -73,8 +73,8 @@ async def _matrix_html_to_telegram(
html = not_command_regex.sub(r"\1", html)
parsed = await MatrixParser(client).parse(add_surrogate(html))
text = del_surrogate(parsed.text.strip())
text, entities = _cut_long_message(text, parsed.telegram_entities)
text, entities = _cut_long_message(parsed.text, parsed.telegram_entities)
text = del_surrogate(strip_text(text, entities))
return text, entities
except Exception as e:
+9 -4
View File
@@ -28,6 +28,7 @@ from telethon.tl.types import (
MessageEntityBotCommand,
MessageEntityCashtag,
MessageEntityCode,
MessageEntityCustomEmoji,
MessageEntityEmail,
MessageEntityHashtag,
MessageEntityItalic,
@@ -133,7 +134,7 @@ async def telegram_to_matrix(
) -> TextMessageEventContent:
content = TextMessageEventContent(
msgtype=MessageType.TEXT,
body=add_surrogate(override_text or evt.message),
body=override_text or evt.message,
)
entities = override_entities or evt.entities
if entities:
@@ -196,8 +197,9 @@ async def _telegram_entities_to_matrix(
elif relative_offset < last_offset:
continue
while within_surrogate(text, relative_offset, length=length):
relative_offset += 1
# TODO this breaks when there are lots of emojis in a row (e.g. custom emojis)
# while within_surrogate(text, relative_offset, length=length):
# relative_offset += 1
while within_surrogate(text, relative_offset + entity.length, length=length):
entity.length += 1
@@ -241,6 +243,9 @@ async def _telegram_entities_to_matrix(
await _parse_url(
html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None
)
elif entity_type == MessageEntityCustomEmoji:
# TODO support properly
html.append(entity_text)
elif entity_type in (
MessageEntityBotCommand,
MessageEntityHashtag,
@@ -318,7 +323,7 @@ message_link_regex = re.compile(
)
async def _parse_url(html: list[str], entity_text: str, url: str):
async def _parse_url(html: list[str], entity_text: str, url: str) -> None:
url = escape(url) if url else entity_text
if not url.startswith(("https://", "http://", "ftp://", "magnet://")):
url = "http://" + url
+6 -2
View File
@@ -125,9 +125,9 @@ def _read_video_thumbnail(
def _location_to_id(location: TypeLocation) -> str:
if isinstance(location, Document):
return f"{location.id}-{location.access_hash}"
return str(location.id)
elif isinstance(location, (InputDocumentFileLocation, InputPhotoFileLocation)):
return f"{location.id}-{location.access_hash}-{location.thumb_size}"
return f"{location.id}-{location.thumb_size}"
elif isinstance(location, InputFileLocation):
return f"{location.volume_id}-{location.local_id}"
elif isinstance(location, InputPeerPhotoFileLocation):
@@ -155,6 +155,8 @@ async def transfer_thumbnail_to_matrix(
if custom_data:
loc_id += "-mau_custom_thumbnail"
if encrypt:
loc_id += "-encrypted"
db_file = await DBTelegramFile.get(loc_id)
if db_file:
@@ -226,6 +228,8 @@ async def transfer_file_to_matrix(
location_id = _location_to_id(location)
if not location_id:
return None
if encrypt:
location_id += "-encrypted"
db_file = await DBTelegramFile.get(location_id)
if db_file:
+1 -1
View File
@@ -5,7 +5,7 @@ aiohttp>=3,<4
yarl>=1,<2
mautrix>=0.17.4,<0.18
#telethon>=1.24,<1.25
tulir-telethon==1.25.0a19
tulir-telethon==1.25.0a20
asyncpg>=0.20,<0.27
mako>=1,<2
setuptools