diff --git a/mautrix_telegram/formatter/from_matrix/parser.py b/mautrix_telegram/formatter/from_matrix/parser.py
index bf9b44d1..162d12b8 100644
--- a/mautrix_telegram/formatter/from_matrix/parser.py
+++ b/mautrix_telegram/formatter/from_matrix/parser.py
@@ -18,15 +18,15 @@ from typing import List, Tuple, Pattern
import re
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
- MessageEntityMentionName as MentionName, MessageEntityEmail as Email,
- MessageEntityUrl as URL, MessageEntityTextUrl as TextURL,
+ MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
+ MessageEntityEmail as Email, MessageEntityTextUrl as TextURL,
MessageEntityBold as Bold, MessageEntityItalic as Italic,
MessageEntityCode as Code, MessageEntityPre as Pre,
- TypeMessageEntity)
+ MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
+ MessageEntityBlockquote as Blockquote, TypeMessageEntity)
from ... import user as u, puppet as pu, portal as po
from ...types import MatrixUserID
-from ..util import html_to_unicode
from .telegram_message import TelegramMessage, Entity, offset_length_multiply
from .html_reader import HTMLNode, read_html
@@ -101,13 +101,6 @@ class MatrixParser:
children.append(child)
return TelegramMessage.join(children, "\n")
- @classmethod
- def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
- msg = cls.tag_aware_parse_node(node, ctx)
- children = msg.trim().split("\n")
- children = [child.prepend("> ") for child in children]
- return TelegramMessage.join(children, "\n")
-
@classmethod
def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_tmessages(node, ctx)
@@ -122,15 +115,14 @@ class MatrixParser:
msg.format(Bold)
elif node.tag in ("i", "em"):
msg.format(Italic)
+ elif node.tag in ("s", "strike", "del"):
+ msg.format(Strike)
+ elif node.tag in ("u", "ins"):
+ msg.format(Underline)
+ elif node == "blockquote":
+ msg.format(Blockquote)
elif node.tag == "command":
msg.format(Command)
- elif node.tag in ("s", "strike", "del"):
- msg.text = html_to_unicode(msg.text, "\u0336")
- elif node.tag in ("u", "ins"):
- msg.text = html_to_unicode(msg.text, "\u0332")
-
- if node.tag in ("s", "strike", "del", "u", "ins"):
- msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2))
return msg
@@ -171,9 +163,7 @@ class MatrixParser:
@classmethod
def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
- if node.tag == "blockquote":
- return cls.blockquote_to_tmessage(node, ctx)
- elif node.tag == "ol":
+ if node.tag == "ol":
return cls.list_to_tmessage(node, ctx)
elif node.tag == "ul":
return cls.list_to_tmessage(node, ctx.enter_list())
@@ -181,7 +171,8 @@ class MatrixParser:
return cls.header_to_tmessage(node, ctx)
elif node.tag == "br":
return TelegramMessage("\n")
- elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"):
+ elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "blockquote",
+ "command"):
return cls.basic_format_to_tmessage(node, ctx)
elif node.tag == "a":
return cls.link_to_tstring(node, ctx)
diff --git a/mautrix_telegram/formatter/from_telegram.py b/mautrix_telegram/formatter/from_telegram.py
index ccbe82e7..62e9ff8a 100644
--- a/mautrix_telegram/formatter/from_telegram.py
+++ b/mautrix_telegram/formatter/from_telegram.py
@@ -24,7 +24,8 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M
MessageEntityItalic, MessageEntityCode, MessageEntityPre,
MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag,
MessageEntityPhone, TypeMessageEntity, Message, PeerChannel,
- MessageFwdHeader, PeerUser)
+ MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader,
+ MessageEntityUnderline, PeerUser)
from mautrix_appservice import MatrixRequestError
from mautrix_appservice.intent_api import IntentAPI
@@ -33,7 +34,7 @@ from .. import user as u, puppet as pu, portal as po
from ..types import TelegramID
from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
- trim_reply_fallback_text, unicode_to_html)
+ trim_reply_fallback_text)
if TYPE_CHECKING:
from ..abstract_user import AbstractUser
@@ -194,9 +195,6 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
text += f"\n- {evt.post_author}"
html += f"
- {evt.post_author}"
- html = unicode_to_html(text, html, "\u0336", "del")
- html = unicode_to_html(text, html, "\u0332", "u")
-
if html:
html = html.replace("\n", "
")
@@ -214,29 +212,43 @@ def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEnti
return "[failed conversion in _telegram_entities_to_matrix]"
-def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str:
+def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity],
+ offset: int = 0, length: int = None) -> str:
if not entities:
- return text
+ return escape(text)
+ if length is None:
+ length = len(text)
html = []
last_offset = 0
- for entity in entities:
- if entity.offset > last_offset:
- html.append(escape(text[last_offset:entity.offset]))
- elif entity.offset < last_offset:
+ for i, entity in enumerate(entities):
+ if entity.offset > offset + length:
+ break
+ relative_offset = entity.offset - offset
+ if relative_offset > last_offset:
+ html.append(escape(text[last_offset:relative_offset]))
+ elif relative_offset < last_offset:
continue
skip_entity = False
- entity_text = escape(text[entity.offset:entity.offset + entity.length])
+ entity_text = _telegram_entities_to_matrix(
+ text=text[relative_offset:relative_offset + entity.length],
+ entities=entities[i + 1:], offset=entity.offset, length=entity.length)
entity_type = type(entity)
if entity_type == MessageEntityBold:
html.append(f"{entity_text}")
elif entity_type == MessageEntityItalic:
html.append(f"{entity_text}")
+ elif entity_type == MessageEntityUnderline:
+ html.append(f"{entity_text}")
+ elif entity_type == MessageEntityStrike:
+ html.append(f"{entity_text}")
+ elif entity_type == MessageEntityBlockquote:
+ html.append(f"
{entity_text}") elif entity_type == MessageEntityCode: - html.append(("
{entity_text}"
- if "\n" in entity_text
- else "{entity_text}").format(entity_text=entity_text))
+ html.append(f"{entity_text}"
+ if "\n" in entity_text
+ else f"{entity_text}")
elif entity_type == MessageEntityPre:
skip_entity = _parse_pre(html, entity_text, entity.language)
elif entity_type == MessageEntityMention:
@@ -254,8 +266,8 @@ def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -
html.append(f"{entity_text}")
else:
skip_entity = True
- last_offset = entity.offset + (0 if skip_entity else entity.length)
- html.append(text[last_offset:])
+ last_offset = relative_offset + (0 if skip_entity else entity.length)
+ html.append(escape(text[last_offset:]))
return "".join(html)
diff --git a/mautrix_telegram/formatter/util.py b/mautrix_telegram/formatter/util.py
index b0456f51..4ac01284 100644
--- a/mautrix_telegram/formatter/util.py
+++ b/mautrix_telegram/formatter/util.py
@@ -20,38 +20,6 @@ import struct
import re
-def unicode_to_html(text: str, html: str, ctrl: str, tag: str) -> str:
- if ctrl not in text:
- return html
- if not html:
- html = escape(text)
- tag_start = f"<{tag}>"
- tag_end = f"{tag}>"
- characters = html.split(ctrl)
- html = ""
- in_tag = False
- for char in characters:
- if not in_tag:
- if len(char) > 1:
- html += char[0:-1]
- char = char[-1]
- html += tag_start
- in_tag = True
- html += char
- else:
- if len(char) > 1:
- html += tag_end
- in_tag = False
- html += char
- if in_tag:
- html += tag_end
- return html
-
-
-def html_to_unicode(text: str, ctrl: str) -> str:
- return ctrl.join(text) + ctrl
-
-
# add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon.
# Licensed under the MIT license.
# https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py
diff --git a/setup.py b/setup.py
index df860716..0f757c09 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@ extras = {
"fast_crypto": ["cryptg>=0.1,<0.3"],
"webp_convert": ["Pillow>=4.3.0,<7"],
"hq_thumbnails": ["moviepy>=1.0,<2.0"],
- "metrics": ["prometheus-client>=0.6.0,<0.7.0"],
+ "metrics": ["prometheus-client>=0.6.0,<0.8.0"],
}
extras["all"] = list({dep for deps in extras.values() for dep in deps})
@@ -38,7 +38,7 @@ setuptools.setup(
"ruamel.yaml>=0.15.35,<0.16",
"future-fstrings>=0.4.2",
"python-magic>=0.4.15,<0.5",
- "telethon>=1.7,<1.9",
+ "telethon>=1.9,<1.10",
"telethon-session-sqlalchemy>=0.2.14,<0.3",
],
extras_require=extras,