Merge pull request #196 from tulir/lxml-formatter

Add tree-based HTML parser for Matrix->Telegram formatting
This commit is contained in:
Tulir Asokan
2018-07-25 22:25:07 -04:00
committed by GitHub
6 changed files with 539 additions and 141 deletions
@@ -0,0 +1,156 @@
# -*- coding: future_fstrings -*-
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2018 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, List, Tuple, Callable, Pattern, Match, TYPE_CHECKING
import re
import logging
from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityItalic,
TypeMessageEntity)
from ... import puppet as pu
from ...db import Message as DBMessage
from ..util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text)
from .parser_common import ParsedMessage
try:
from mautrix_telegram.formatter.from_matrix.parser_lxml import parse_html
except ImportError:
from mautrix_telegram.formatter.from_matrix.parser_htmlparser import parse_html
if TYPE_CHECKING:
from ...context import Context
log = logging.getLogger("mau.fmt.mx") # type: logging.Logger
should_bridge_plaintext_highlights = False # type: bool
command_regex = re.compile(r"^!([A-Za-z0-9@]+)") # type: Pattern
not_command_regex = re.compile(r"^\\(![A-Za-z0-9@]+)") # type: Pattern
plain_mention_regex = None # type: Pattern
def plain_mention_to_html(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
return (f"{match.group(1)}"
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
f"{puppet.displayname}"
"</a>")
return "".join(match.groups())
def cut_long_message(message: str, entities: List[TypeMessageEntity]) -> ParsedMessage:
if len(message) > 4096:
message = message[0:4082] + " [message cut]"
new_entities = []
for entity in entities:
if entity.offset > 4082:
continue
if entity.offset + entity.length > 4082:
entity.length = 4082 - entity.offset
new_entities.append(entity)
new_entities.append(MessageEntityItalic(4082, len(" [message cut]")))
entities = new_entities
return message, entities
class FormatError(Exception):
pass
def matrix_to_telegram(html: str) -> ParsedMessage:
try:
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(plain_mention_to_html, html)
html = add_surrogates(html)
text, entities = parse_html(add_surrogates(html))
text = remove_surrogates(text.strip())
text, entities = cut_long_message(text, entities)
return text, entities
except Exception as e:
raise FormatError(f"Failed to convert Matrix format: {html}") from e
def matrix_reply_to_telegram(content: dict, tg_space: int, room_id: Optional[str] = None
) -> Optional[int]:
try:
reply = content["m.relates_to"]["m.in_reply_to"]
room_id = room_id or reply["room_id"]
event_id = reply["event_id"]
try:
if content["format"] == "org.matrix.custom.html":
content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"])
except KeyError:
pass
content["body"] = trim_reply_fallback_text(content["body"])
message = DBMessage.query.filter(DBMessage.mxid == event_id,
DBMessage.tg_space == tg_space,
DBMessage.mx_room == room_id).one_or_none()
if message:
return message.tgid
except KeyError:
pass
return None
def matrix_text_to_telegram(text: str) -> ParsedMessage:
text = command_regex.sub(r"/\1", text)
text = text.replace("\t", " " * 4)
text = not_command_regex.sub(r"\1", text)
if should_bridge_plaintext_highlights:
entities, pmr_replacer = plain_mention_to_text()
text = plain_mention_regex.sub(pmr_replacer, text)
else:
entities = []
return text, entities
def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[str], str]]:
entities = []
def replacer(match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
offset = match.start()
length = match.end() - offset
if puppet.username:
entity = MessageEntityMention(offset, length)
text = f"@{puppet.username}"
else:
entity = MessageEntityMentionName(offset, length, user_id=puppet.tgid)
text = puppet.displayname
entities.append(entity)
return text
return "".join(match.groups())
return entities, replacer
def init_mx(context: "Context"):
global plain_mention_regex, should_bridge_plaintext_highlights
config = context.config
dn_template = config.get("bridge.displayname_template", "{displayname} (Telegram)")
dn_template = re.escape(dn_template).replace(re.escape("{displayname}"), "[^>]+")
plain_mention_regex = re.compile(f"(\s|^)({dn_template})")
should_bridge_plaintext_highlights = config["bridge.plaintext_highlights"] or False
@@ -0,0 +1,31 @@
# -*- coding: future_fstrings -*-
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2018 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import re
from typing import List, Tuple, Pattern
from telethon.tl.types import TypeMessageEntity
class MatrixParserCommon:
mention_regex = re.compile("https://matrix.to/#/(@.+:.+)") # type: Pattern
room_regex = re.compile("https://matrix.to/#/(#.+:.+)") # type: Pattern
block_tags = ("br", "p", "pre", "blockquote",
"ol", "ul", "li",
"h1", "h2", "h3", "h4", "h5", "h6",
"div", "hr", "table") # type: Tuple[str, ...]
ParsedMessage = Tuple[str, List[TypeMessageEntity]]
@@ -14,41 +14,31 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import (Optional, List, Tuple, Type, Callable, Dict, Any, Pattern, Deque, Match, TYPE_CHECKING)
from typing import (Optional, List, Tuple, Type, Dict, Any, Deque, Match)
from html import unescape
from html.parser import HTMLParser
from collections import deque
import math
import re
import logging
from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, MessageEntityEmail,
MessageEntityUrl, MessageEntityTextUrl, MessageEntityBold,
MessageEntityItalic, MessageEntityCode, MessageEntityPre,
MessageEntityBotCommand, TypeMessageEntity)
from .. import user as u, puppet as pu, portal as po
from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text, html_to_unicode)
if TYPE_CHECKING:
from ..context import Context
log = logging.getLogger("mau.fmt.mx") # type: logging.Logger
should_bridge_plaintext_highlights = False # type: bool
from ... import user as u, puppet as pu, portal as po
from ..util import html_to_unicode
from .parser_common import MatrixParserCommon, ParsedMessage
class MatrixParser(HTMLParser):
mention_regex = re.compile("https://matrix.to/#/(@.+:.+)") # type: Pattern
room_regex = re.compile("https://matrix.to/#/(#.+:.+)") # type: Pattern
block_tags = ("br", "p", "pre", "blockquote",
"ol", "ul", "li",
"h1", "h2", "h3", "h4", "h5", "h6",
"div", "hr", "table") # type: Tuple[str, ...]
def parse_html(html: str) -> ParsedMessage:
parser = MatrixParser()
parser.feed(html)
return parser.text, parser.entities
class MatrixParser(HTMLParser, MatrixParserCommon):
def __init__(self):
super().__init__()
super(MatrixParser, self).__init__()
self.text = "" # type: str
self.entities = [] # type: List[TypeMessageEntity]
self._building_entities = {} # type: Dict[str, TypeMessageEntity]
@@ -244,120 +234,3 @@ class MatrixParser(HTMLParser):
if tag in self.block_tags and tag != "br" and "blockquote" not in self._open_tags:
self._newline(allow_multi=tag == "br")
command_regex = re.compile(r"^!([A-Za-z0-9@]+)") # type: Pattern
not_command_regex = re.compile(r"^\\(![A-Za-z0-9@]+)") # type: Pattern
plain_mention_regex = None # type: Pattern
def plain_mention_to_html(match: Match) -> str:
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
return (f"{match.group(1)}"
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
f"{puppet.displayname}"
"</a>")
return "".join(match.groups())
def cut_long_message(message: str, entities: List[TypeMessageEntity]
) -> Tuple[str, List[TypeMessageEntity]]:
if len(message) > 4096:
message = message[0:4082] + " [message cut]"
new_entities = []
for entity in entities:
if entity.offset > 4082:
continue
if entity.offset + entity.length > 4082:
entity.length = 4082 - entity.offset
new_entities.append(entity)
new_entities.append(MessageEntityItalic(4082, len(" [message cut]")))
entities = new_entities
return message, entities
def matrix_to_telegram(html: str) -> Tuple[str, List[TypeMessageEntity]]:
try:
parser = MatrixParser()
html = command_regex.sub(r"<command>\1</command>", html)
html = html.replace("\t", " " * 4)
html = not_command_regex.sub(r"\1", html)
if should_bridge_plaintext_highlights:
html = plain_mention_regex.sub(plain_mention_to_html, html)
parser.feed(add_surrogates(html))
message_text = remove_surrogates(parser.text.strip())
message_entities = parser.entities
message_text, message_entities = cut_long_message(message_text, message_entities)
return message_text, message_entities
except Exception:
log.exception("Failed to convert Matrix format:\nhtml=%s", html)
def matrix_reply_to_telegram(content: dict, tg_space: int, room_id: Optional[str] = None
) -> Optional[int]:
try:
reply = content["m.relates_to"]["m.in_reply_to"]
room_id = room_id or reply["room_id"]
event_id = reply["event_id"]
try:
if content["format"] == "org.matrix.custom.html":
content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"])
except KeyError:
pass
content["body"] = trim_reply_fallback_text(content["body"])
message = DBMessage.query.filter(DBMessage.mxid == event_id,
DBMessage.tg_space == tg_space,
DBMessage.mx_room == room_id).one_or_none()
if message:
return message.tgid
except KeyError:
pass
return None
def matrix_text_to_telegram(text: str) -> Tuple[str, List[TypeMessageEntity]]:
text = command_regex.sub(r"/\1", text)
text = text.replace("\t", " " * 4)
text = not_command_regex.sub(r"\1", text)
if should_bridge_plaintext_highlights:
entities, pmr_replacer = plain_mention_to_text()
text = plain_mention_regex.sub(pmr_replacer, text)
else:
entities = []
return text, entities
def plain_mention_to_text() -> Tuple[List[TypeMessageEntity], Callable[[str], str]]:
entities = []
def replacer(match):
puppet = pu.Puppet.find_by_displayname(match.group(2))
if puppet:
offset = match.start()
length = match.end() - offset
if puppet.username:
entity = MessageEntityMention(offset, length)
text = f"@{puppet.username}"
else:
entity = MessageEntityMentionName(offset, length, user_id=puppet.tgid)
text = puppet.displayname
entities.append(entity)
return text
return "".join(match.groups())
return entities, replacer
def init_mx(context: "Context"):
global plain_mention_regex, should_bridge_plaintext_highlights
config = context.config
dn_template = config.get("bridge.displayname_template", "{displayname} (Telegram)")
dn_template = re.escape(dn_template).replace(re.escape("{displayname}"), "[^>]+")
plain_mention_regex = re.compile(f"(\s|^)({dn_template})")
should_bridge_plaintext_highlights = config["bridge.plaintext_highlights"] or False
@@ -0,0 +1,337 @@
# -*- coding: future_fstrings -*-
# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2018 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Optional, List, Tuple, Union, Callable
from lxml import html
from telethon.tl.types import (MessageEntityMention as Mention,
MessageEntityMentionName as MentionName, MessageEntityEmail as Email,
MessageEntityUrl as URL, MessageEntityTextUrl as TextURL,
MessageEntityBold as Bold, MessageEntityItalic as Italic,
MessageEntityCode as Code, MessageEntityPre as Pre,
MessageEntityBotCommand as Command, TypeMessageEntity,
InputMessageEntityMentionName as InputMentionName)
from ... import user as u, puppet as pu, portal as po
from ..util import html_to_unicode
from .parser_common import MatrixParserCommon, ParsedMessage
def parse_html(html: str) -> ParsedMessage:
return MatrixParser.parse(html)
class Entity:
@staticmethod
def copy(entity: TypeMessageEntity) -> TypeMessageEntity:
kwargs = {
"offset": entity.offset,
"length": entity.length,
}
if isinstance(entity, Pre):
kwargs["language"] = entity.language
elif isinstance(entity, TextURL):
kwargs["url"] = entity.url
elif isinstance(entity, (MentionName, InputMentionName)):
kwargs["user_id"] = entity.user_id
return entity.__class__(**kwargs)
@classmethod
def adjust(cls, entity: Union[TypeMessageEntity, List[TypeMessageEntity]],
func: Callable[[TypeMessageEntity], None]
) -> Union[TypeMessageEntity, List[TypeMessageEntity]]:
if isinstance(entity, list):
return [Entity.adjust(element, func) for element in entity]
entity = cls.copy(entity)
func(entity)
if entity.offset < 0:
entity.length += entity.offset
entity.offset = 0
return entity
def offset_diff(amount: int):
def func(entity: TypeMessageEntity):
entity.offset += amount
return func
def offset_length_multiply(amount: int):
def func(entity: TypeMessageEntity):
entity.offset *= amount
entity.length *= amount
return func
class TelegramMessage:
def __init__(self, text: str = "", entities: Optional[List[TypeMessageEntity]] = None):
self.text = text # type: str
self.entities = entities or [] # type: List[TypeMessageEntity]
def offset_entities(self, offset: int) -> "TelegramMessage":
def apply_offset(entity: TypeMessageEntity, inner_offset: int):
entity = Entity.copy(entity)
entity.offset += inner_offset
if entity.offset < 0:
entity.offset = 0
elif entity.offset > len(self.text):
return None
elif entity.offset + entity.length > len(self.text):
entity.length = len(self.text) - entity.offset
return entity
self.entities = [apply_offset(entity, offset) for entity in self.entities if entity]
return self
def append(self, *args: Union[str, "TelegramMessage"]) -> "TelegramMessage":
for msg in args:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
self.entities += Entity.adjust(msg.entities, offset_diff(len(self.text)))
self.text += msg.text
return self
def prepend(self, *args: Union[str, "TelegramMessage"]) -> "TelegramMessage":
for msg in args:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
self.entities = msg.entities + Entity.adjust(self.entities, offset_diff(len(self.text)))
self.text = msg.text + self.text
return self
def format(self, entity_type: type(TypeMessageEntity), offset: int = None, length: int = None,
**kwargs) -> "TelegramMessage":
self.entities.append(entity_type(offset=offset or 0,
length=length if length is not None else len(self.text),
**kwargs))
return self
def concat(self, *args: Union[str, "TelegramMessage"]) -> "TelegramMessage":
return TelegramMessage().append(self, *args)
def trim(self) -> "TelegramMessage":
orig_len = len(self.text)
self.text = self.text.lstrip()
diff = orig_len - len(self.text)
self.text = self.text.rstrip()
self.offset_entities(-diff)
return self
def split(self, separator, max_items: int = 0) -> List["TelegramMessage"]:
text_parts = self.text.split(separator, max_items - 1)
output = [] # type: List[TelegramMessage]
offset = 0
for part in text_parts:
msg = TelegramMessage(part)
for entity in self.entities:
start_in_range = len(part) > entity.offset - offset >= 0
end_in_range = len(part) >= entity.offset - offset + entity.length > 0
if start_in_range and end_in_range:
msg.entities.append(Entity.adjust(entity, offset_diff(-offset)))
output.append(msg)
offset += len(part)
offset += len(separator)
return output
@staticmethod
def join(items: List[Union[str, "TelegramMessage"]], separator: str = " ") -> "TelegramMessage":
main = TelegramMessage()
for msg in items:
if isinstance(msg, str):
msg = TelegramMessage(text=msg)
main.entities += Entity.adjust(msg.entities, offset_diff(len(main.text)))
main.text += msg.text + separator
main.text = main.text[:-len(separator)]
return main
class MatrixParser(MatrixParserCommon):
@classmethod
def list_to_tmessage(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
ordered = node.tag == "ol"
tagged_children = cls.node_to_tagged_tmessages(node, strip_linebreaks)
counter = 1
indent_length = 0
if ordered:
try:
counter = int(node.attrib.get("start", "1"))
except ValueError:
counter = 1
longest_index = counter - 1 + len(tagged_children)
indent_length = len(str(longest_index))
indent = (indent_length + 4) * " "
children = [] # type: List[TelegramMessage]
for child, tag in tagged_children:
if tag != "li":
continue
if ordered:
prefix = f"{counter}. "
counter += 1
else:
prefix = ""
child = child.prepend(prefix)
parts = child.split("\n")
parts = parts[:1] + [part.prepend(indent) for part in parts[1:]]
child = TelegramMessage.join(parts, "\n")
children.append(child)
return TelegramMessage.join(children, "\n")
@classmethod
def blockquote_to_tmessage(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, strip_linebreaks)
children = msg.trim().split("\n")
children = [child.prepend("> ") for child in children]
return TelegramMessage.join(children, "\n")
@classmethod
def header_to_tmessage(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
children = cls.node_to_tmessages(node, strip_linebreaks)
length = int(node.tag[1])
prefix = "#" * length + " "
return TelegramMessage.join(children, "").prepend(prefix)
@classmethod
def basic_format_to_tmessage(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, strip_linebreaks)
if node.tag in ("b", "strong"):
msg.format(Bold)
elif node.tag in ("i", "em"):
msg.format(Italic)
elif node.tag == "command":
msg.format(Command)
elif node.tag in ("s", "del"):
msg.text = html_to_unicode(msg.text, "\u0336")
elif node.tag in ("u", "ins"):
msg.text = html_to_unicode(msg.text, "\u0332")
if node.tag in ("s", "del", "u", "ins"):
msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2))
return msg
@classmethod
def link_to_tstring(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, strip_linebreaks)
href = node.attrib.get("href", "")
if not href:
return msg
if href.startswith("mailto:"):
return TelegramMessage(href[len("mailto:"):]).format(Email)
mention = cls.mention_regex.match(href)
if mention:
mxid = mention.group(1)
user = (pu.Puppet.get_by_mxid(mxid)
or u.User.get_by_mxid(mxid, create=False))
if not user:
return msg
if user.username:
return TelegramMessage(f"@{user.username}").format(Mention)
elif user.tgid:
return TelegramMessage(user.displayname or msg.text).format(MentionName,
user_id=user.tgid)
return msg
room = cls.room_regex.match(href)
if room:
username = po.Portal.get_username_from_mx_alias(room.group(1))
portal = po.Portal.find_by_username(username)
if portal and portal.username:
return TelegramMessage(f"@{portal.username}").format(Mention)
return (msg.format(URL)
if msg.text == href
else msg.format(TextURL, url=href))
@classmethod
def node_to_tmessage(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
if node.tag == "blockquote":
return cls.blockquote_to_tmessage(node, strip_linebreaks)
elif node.tag in ("ol", "ul"):
return cls.list_to_tmessage(node, strip_linebreaks)
elif node.tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
return cls.header_to_tmessage(node, strip_linebreaks)
elif node.tag == "br":
return TelegramMessage("\n")
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"):
return cls.basic_format_to_tmessage(node, strip_linebreaks)
elif node.tag == "a":
return cls.link_to_tstring(node, strip_linebreaks)
elif node.tag == "p":
return cls.tag_aware_parse_node(node, strip_linebreaks).append("\n")
elif node.tag == "pre":
lang = ""
try:
if node[0].tag == "code":
lang = node[0].attrib["class"][len("language-"):]
node = node[0]
except (IndexError, KeyError):
pass
return cls.parse_node(node, strip_linebreaks=False).format(Pre, language=lang)
elif node.tag == "code":
return cls.parse_node(node, strip_linebreaks=False).format(Code)
return cls.tag_aware_parse_node(node, strip_linebreaks)
@staticmethod
def text_to_tmessage(text: str, strip_linebreaks: bool = True) -> TelegramMessage:
if strip_linebreaks:
text = text.replace("\n", "")
return TelegramMessage(text)
@classmethod
def node_to_tagged_tmessages(cls, node: html.HtmlElement, strip_linebreaks: bool = True
) -> List[Tuple[TelegramMessage, str]]:
output = []
if node.text:
output.append((cls.text_to_tmessage(node.text, strip_linebreaks), "text"))
for child in node:
output.append((cls.node_to_tmessage(child, strip_linebreaks), child.tag))
if child.tail:
output.append((cls.text_to_tmessage(child.tail, strip_linebreaks), "text"))
return output
@classmethod
def node_to_tmessages(cls, node: html.HtmlElement, strip_linebreaks) -> List[TelegramMessage]:
return [msg for (msg, tag) in cls.node_to_tagged_tmessages(node, strip_linebreaks)]
@classmethod
def tag_aware_parse_node(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
msgs = cls.node_to_tagged_tmessages(node, strip_linebreaks)
output = TelegramMessage()
for msg, tag in msgs:
if tag in cls.block_tags:
msg = msg.append("\n").prepend("\n")
output = output.append(msg)
return output.trim()
@classmethod
def parse_node(cls, node: html.HtmlElement, strip_linebreaks) -> TelegramMessage:
return TelegramMessage.join(cls.node_to_tmessages(node, strip_linebreaks))
@classmethod
def parse(cls, data: str) -> ParsedMessage:
document = html.fromstring(f"<html>{data}</html>")
msg = cls.parse_node(document, strip_linebreaks=True)
return msg.text, msg.entities
+2 -2
View File
@@ -789,9 +789,9 @@ class Portal:
def _matrix_event_to_entities(event: dict) -> Tuple[str, Optional[List[TypeMessageEntity]]]:
try:
if event.get("format", None) == "org.matrix.custom.html":
message, entities = formatter.matrix_to_telegram(event["formatted_body"])
message, entities = formatter.matrix_to_telegram(event.get("formatted_body", ""))
else:
message, entities = formatter.matrix_text_to_telegram(event["body"])
message, entities = formatter.matrix_text_to_telegram(event.get("body", ""))
except KeyError:
message, entities = None, None
return message, entities
+2 -1
View File
@@ -4,11 +4,12 @@ import mautrix_telegram
extras = {
"highlight_edits": ["lxml>=4.1.1,<5"],
"better_formatter": ["lxml>=4.1.1,<5"],
"fast_crypto": ["cryptg>=0.1,<0.2"],
"webp_convert": ["Pillow>=5.0.0,<6"],
"hq_thumbnails": ["moviepy>=0.2,<0.3"],
}
extras["all"] = [deps[0] for deps in extras.values()]
extras["all"] = list(set(deps[0] for deps in extras.values()))
setuptools.setup(
name="mautrix-telegram",