Fix and refactor Matrix->Telegram formatter
This commit is contained in:
@@ -151,17 +151,22 @@ class MatrixParser(HTMLParser):
|
|||||||
for entity in self._building_entities.values():
|
for entity in self._building_entities.values():
|
||||||
entity.length += 1
|
entity.length += 1
|
||||||
|
|
||||||
def handle_data(self, text):
|
def _handle_special_previous_tags(self, text):
|
||||||
text = unescape(text)
|
if "pre" not in self._open_tags and "code" not in self._open_tags:
|
||||||
|
text = text.replace("\n", "")
|
||||||
|
else:
|
||||||
|
text = text.strip()
|
||||||
|
|
||||||
previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ""
|
previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ""
|
||||||
extra_offset = 0
|
|
||||||
if previous_tag == "a":
|
if previous_tag == "a":
|
||||||
url = self._open_tags_meta[0]
|
url = self._open_tags_meta[0]
|
||||||
if url:
|
if url:
|
||||||
text = url
|
text = url
|
||||||
elif previous_tag == "command":
|
elif previous_tag == "command":
|
||||||
text = f"/{text}"
|
text = f"/{text}"
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _html_to_unicode(self, text):
|
||||||
strikethrough, underline = "del" in self._open_tags, "u" in self._open_tags
|
strikethrough, underline = "del" in self._open_tags, "u" in self._open_tags
|
||||||
if strikethrough and underline:
|
if strikethrough and underline:
|
||||||
text = html_to_unicode(text, "\u0336\u0332")
|
text = html_to_unicode(text, "\u0336\u0332")
|
||||||
@@ -169,7 +174,10 @@ class MatrixParser(HTMLParser):
|
|||||||
text = html_to_unicode(text, "\u0336")
|
text = html_to_unicode(text, "\u0336")
|
||||||
elif underline:
|
elif underline:
|
||||||
text = html_to_unicode(text, "\u0332")
|
text = html_to_unicode(text, "\u0332")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _handle_tags_for_data(self, text):
|
||||||
|
extra_offset = 0
|
||||||
list_entry_handled_once = False
|
list_entry_handled_once = False
|
||||||
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
|
# In order to maintain order of things like blockquotes in lists or lists in blockquotes,
|
||||||
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
|
# we can't just have ifs/elses and we need to actually loop through the open tags in order.
|
||||||
@@ -197,10 +205,19 @@ class MatrixParser(HTMLParser):
|
|||||||
text = indent + prefix + text
|
text = indent + prefix + text
|
||||||
self._list_entry_is_new = False
|
self._list_entry_is_new = False
|
||||||
list_entry_handled_once = True
|
list_entry_handled_once = True
|
||||||
|
return text, extra_offset
|
||||||
|
|
||||||
|
def _extend_entities_in_construction(self, text, extra_offset):
|
||||||
for tag, entity in self._building_entities.items():
|
for tag, entity in self._building_entities.items():
|
||||||
entity.length += len(text) - extra_offset
|
entity.length += len(text) - extra_offset
|
||||||
entity.offset += extra_offset
|
entity.offset += extra_offset
|
||||||
|
|
||||||
|
def handle_data(self, text):
|
||||||
|
text = unescape(text)
|
||||||
|
text = self._handle_special_previous_tags(text)
|
||||||
|
text = self._html_to_unicode(text)
|
||||||
|
text, extra_offset = self._handle_tags_for_data(text)
|
||||||
|
self._extend_entities_in_construction(text, extra_offset)
|
||||||
self._line_is_new = False
|
self._line_is_new = False
|
||||||
self.text += text
|
self.text += text
|
||||||
|
|
||||||
@@ -223,6 +240,52 @@ command_regex = re.compile("(\s|^)!([A-Za-z0-9@]+)")
|
|||||||
plain_mention_regex = None
|
plain_mention_regex = None
|
||||||
|
|
||||||
|
|
||||||
|
def plain_mention_to_html(match):
|
||||||
|
puppet = pu.Puppet.find_by_displayname(match.group(2))
|
||||||
|
if puppet:
|
||||||
|
return (f"{match.group(1)}"
|
||||||
|
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
|
||||||
|
f"{puppet.displayname}"
|
||||||
|
"</a>")
|
||||||
|
return "".join(match.groups())
|
||||||
|
|
||||||
|
|
||||||
|
def matrix_to_telegram(html):
|
||||||
|
try:
|
||||||
|
parser = MatrixParser()
|
||||||
|
html = command_regex.sub(r"\1<command>\2</command>", html)
|
||||||
|
if should_bridge_plaintext_highlights:
|
||||||
|
html = plain_mention_regex.sub(plain_mention_to_html, html)
|
||||||
|
parser.feed(add_surrogates(html))
|
||||||
|
print([str(e) for e in parser.entities])
|
||||||
|
return remove_surrogates(parser.text.strip()), parser.entities
|
||||||
|
except Exception:
|
||||||
|
log.exception("Failed to convert Matrix format:\nhtml=%s", html)
|
||||||
|
|
||||||
|
|
||||||
|
def matrix_reply_to_telegram(content, tg_space, room_id=None):
|
||||||
|
try:
|
||||||
|
reply = content["m.relates_to"]["m.in_reply_to"]
|
||||||
|
room_id = room_id or reply["room_id"]
|
||||||
|
event_id = reply["event_id"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
if content["format"] == "org.matrix.custom.html":
|
||||||
|
content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
content["body"] = trim_reply_fallback_text(content["body"])
|
||||||
|
|
||||||
|
message = DBMessage.query.filter(DBMessage.mxid == event_id,
|
||||||
|
DBMessage.tg_space == tg_space,
|
||||||
|
DBMessage.mx_room == room_id).one_or_none()
|
||||||
|
if message:
|
||||||
|
return message.tgid
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def matrix_text_to_telegram(text):
|
def matrix_text_to_telegram(text):
|
||||||
text = command_regex.sub(r"\1/\2", text)
|
text = command_regex.sub(r"\1/\2", text)
|
||||||
if should_bridge_plaintext_highlights:
|
if should_bridge_plaintext_highlights:
|
||||||
@@ -255,52 +318,6 @@ def plain_mention_to_text():
|
|||||||
return entities, replacer
|
return entities, replacer
|
||||||
|
|
||||||
|
|
||||||
def plain_mention_to_html(match):
|
|
||||||
puppet = pu.Puppet.find_by_displayname(match.group(2))
|
|
||||||
if puppet:
|
|
||||||
return (f"{match.group(1)}"
|
|
||||||
f"<a href='https://matrix.to/#/{puppet.mxid}'>"
|
|
||||||
f"{puppet.displayname}"
|
|
||||||
"</a>")
|
|
||||||
return "".join(match.groups())
|
|
||||||
|
|
||||||
|
|
||||||
def matrix_to_telegram(html):
|
|
||||||
try:
|
|
||||||
parser = MatrixParser()
|
|
||||||
html = html.replace("\n", "")
|
|
||||||
html = command_regex.sub(r"\1<command>\2</command>", html)
|
|
||||||
if should_bridge_plaintext_highlights:
|
|
||||||
html = plain_mention_regex.sub(plain_mention_to_html, html)
|
|
||||||
parser.feed(add_surrogates(html))
|
|
||||||
return remove_surrogates(parser.text.strip()), parser.entities
|
|
||||||
except Exception:
|
|
||||||
log.exception("Failed to convert Matrix format:\nhtml=%s", html)
|
|
||||||
|
|
||||||
|
|
||||||
def matrix_reply_to_telegram(content, tg_space, room_id=None):
|
|
||||||
try:
|
|
||||||
reply = content["m.relates_to"]["m.in_reply_to"]
|
|
||||||
room_id = room_id or reply["room_id"]
|
|
||||||
event_id = reply["event_id"]
|
|
||||||
|
|
||||||
try:
|
|
||||||
if content["format"] == "org.matrix.custom.html":
|
|
||||||
content["formatted_body"] = trim_reply_fallback_html(content["formatted_body"])
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
content["body"] = trim_reply_fallback_text(content["body"])
|
|
||||||
|
|
||||||
message = DBMessage.query.filter(DBMessage.mxid == event_id,
|
|
||||||
DBMessage.tg_space == tg_space,
|
|
||||||
DBMessage.mx_room == room_id).one_or_none()
|
|
||||||
if message:
|
|
||||||
return message.tgid
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def init_mx(context):
|
def init_mx(context):
|
||||||
global plain_mention_regex, should_bridge_plaintext_highlights
|
global plain_mention_regex, should_bridge_plaintext_highlights
|
||||||
config = context.config
|
config = context.config
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ async def _add_reply_header(source, text, html, evt, relates_to, main_intent, is
|
|||||||
|
|
||||||
r_keyword = "In reply to" if not is_edit else "Edit to"
|
r_keyword = "In reply to" if not is_edit else "Edit to"
|
||||||
r_msg_link = f"<a href='https://matrix.to/#/{msg.mx_room}/{msg.mxid}'>{r_keyword}</a>"
|
r_msg_link = f"<a href='https://matrix.to/#/{msg.mx_room}/{msg.mxid}'>{r_keyword}</a>"
|
||||||
html = (f"<blockquote data-mx-reply>{r_msg_link} {r_sender_link} {r_html_body}</blockquote>"
|
html = (f"<blockquote data-mx-reply>{r_msg_link} {r_sender_link}\n{r_html_body}</blockquote>"
|
||||||
+ (html or escape(text)))
|
+ (html or escape(text)))
|
||||||
|
|
||||||
lines = r_text_body.strip().split("\n")
|
lines = r_text_body.strip().split("\n")
|
||||||
|
|||||||
@@ -594,7 +594,7 @@ class Portal:
|
|||||||
entity.user_id = await client.get_input_entity(entity.user_id.user_id)
|
entity.user_id = await client.get_input_entity(entity.user_id.user_id)
|
||||||
else:
|
else:
|
||||||
message, entities = formatter.matrix_text_to_telegram(message["body"])
|
message, entities = formatter.matrix_text_to_telegram(message["body"])
|
||||||
return await client.send_message(self.peer, message, reply_to=reply_to)
|
return await client.send_message(self.peer, message, entities=entities, reply_to=reply_to)
|
||||||
|
|
||||||
async def _handle_matrix_file(self, client, message, reply_to):
|
async def _handle_matrix_file(self, client, message, reply_to):
|
||||||
file = await self.main_intent.download_file(message["url"])
|
file = await self.main_intent.download_file(message["url"])
|
||||||
|
|||||||
Reference in New Issue
Block a user