Merge branch 'native-strike-underline'
This commit is contained in:
@@ -18,15 +18,15 @@ from typing import List, Tuple, Pattern
|
||||
import re
|
||||
|
||||
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
|
||||
MessageEntityMentionName as MentionName, MessageEntityEmail as Email,
|
||||
MessageEntityUrl as URL, MessageEntityTextUrl as TextURL,
|
||||
MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
|
||||
MessageEntityEmail as Email, MessageEntityTextUrl as TextURL,
|
||||
MessageEntityBold as Bold, MessageEntityItalic as Italic,
|
||||
MessageEntityCode as Code, MessageEntityPre as Pre,
|
||||
TypeMessageEntity)
|
||||
MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
|
||||
MessageEntityBlockquote as Blockquote, TypeMessageEntity)
|
||||
|
||||
from ... import user as u, puppet as pu, portal as po
|
||||
from ...types import MatrixUserID
|
||||
from ..util import html_to_unicode
|
||||
from .telegram_message import TelegramMessage, Entity, offset_length_multiply
|
||||
|
||||
from .html_reader import HTMLNode, read_html
|
||||
@@ -101,13 +101,6 @@ class MatrixParser:
|
||||
children.append(child)
|
||||
return TelegramMessage.join(children, "\n")
|
||||
|
||||
@classmethod
|
||||
def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
|
||||
msg = cls.tag_aware_parse_node(node, ctx)
|
||||
children = msg.trim().split("\n")
|
||||
children = [child.prepend("> ") for child in children]
|
||||
return TelegramMessage.join(children, "\n")
|
||||
|
||||
@classmethod
|
||||
def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
|
||||
children = cls.node_to_tmessages(node, ctx)
|
||||
@@ -122,15 +115,14 @@ class MatrixParser:
|
||||
msg.format(Bold)
|
||||
elif node.tag in ("i", "em"):
|
||||
msg.format(Italic)
|
||||
elif node.tag in ("s", "strike", "del"):
|
||||
msg.format(Strike)
|
||||
elif node.tag in ("u", "ins"):
|
||||
msg.format(Underline)
|
||||
elif node == "blockquote":
|
||||
msg.format(Blockquote)
|
||||
elif node.tag == "command":
|
||||
msg.format(Command)
|
||||
elif node.tag in ("s", "strike", "del"):
|
||||
msg.text = html_to_unicode(msg.text, "\u0336")
|
||||
elif node.tag in ("u", "ins"):
|
||||
msg.text = html_to_unicode(msg.text, "\u0332")
|
||||
|
||||
if node.tag in ("s", "strike", "del", "u", "ins"):
|
||||
msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2))
|
||||
|
||||
return msg
|
||||
|
||||
@@ -171,9 +163,7 @@ class MatrixParser:
|
||||
|
||||
@classmethod
|
||||
def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
|
||||
if node.tag == "blockquote":
|
||||
return cls.blockquote_to_tmessage(node, ctx)
|
||||
elif node.tag == "ol":
|
||||
if node.tag == "ol":
|
||||
return cls.list_to_tmessage(node, ctx)
|
||||
elif node.tag == "ul":
|
||||
return cls.list_to_tmessage(node, ctx.enter_list())
|
||||
@@ -181,7 +171,8 @@ class MatrixParser:
|
||||
return cls.header_to_tmessage(node, ctx)
|
||||
elif node.tag == "br":
|
||||
return TelegramMessage("\n")
|
||||
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"):
|
||||
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "blockquote",
|
||||
"command"):
|
||||
return cls.basic_format_to_tmessage(node, ctx)
|
||||
elif node.tag == "a":
|
||||
return cls.link_to_tstring(node, ctx)
|
||||
|
||||
@@ -24,7 +24,8 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M
|
||||
MessageEntityItalic, MessageEntityCode, MessageEntityPre,
|
||||
MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag,
|
||||
MessageEntityPhone, TypeMessageEntity, Message, PeerChannel,
|
||||
MessageFwdHeader, PeerUser)
|
||||
MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader,
|
||||
MessageEntityUnderline, PeerUser)
|
||||
|
||||
from mautrix_appservice import MatrixRequestError
|
||||
from mautrix_appservice.intent_api import IntentAPI
|
||||
@@ -33,7 +34,7 @@ from .. import user as u, puppet as pu, portal as po
|
||||
from ..types import TelegramID
|
||||
from ..db import Message as DBMessage
|
||||
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
|
||||
trim_reply_fallback_text, unicode_to_html)
|
||||
trim_reply_fallback_text)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..abstract_user import AbstractUser
|
||||
@@ -194,9 +195,6 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
|
||||
text += f"\n- {evt.post_author}"
|
||||
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
|
||||
|
||||
html = unicode_to_html(text, html, "\u0336", "del")
|
||||
html = unicode_to_html(text, html, "\u0332", "u")
|
||||
|
||||
if html:
|
||||
html = html.replace("\n", "<br/>")
|
||||
|
||||
@@ -214,29 +212,43 @@ def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEnti
|
||||
return "[failed conversion in _telegram_entities_to_matrix]"
|
||||
|
||||
|
||||
def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str:
|
||||
def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity],
|
||||
offset: int = 0, length: int = None) -> str:
|
||||
if not entities:
|
||||
return text
|
||||
return escape(text)
|
||||
if length is None:
|
||||
length = len(text)
|
||||
html = []
|
||||
last_offset = 0
|
||||
for entity in entities:
|
||||
if entity.offset > last_offset:
|
||||
html.append(escape(text[last_offset:entity.offset]))
|
||||
elif entity.offset < last_offset:
|
||||
for i, entity in enumerate(entities):
|
||||
if entity.offset > offset + length:
|
||||
break
|
||||
relative_offset = entity.offset - offset
|
||||
if relative_offset > last_offset:
|
||||
html.append(escape(text[last_offset:relative_offset]))
|
||||
elif relative_offset < last_offset:
|
||||
continue
|
||||
|
||||
skip_entity = False
|
||||
entity_text = escape(text[entity.offset:entity.offset + entity.length])
|
||||
entity_text = _telegram_entities_to_matrix(
|
||||
text=text[relative_offset:relative_offset + entity.length],
|
||||
entities=entities[i + 1:], offset=entity.offset, length=entity.length)
|
||||
entity_type = type(entity)
|
||||
|
||||
if entity_type == MessageEntityBold:
|
||||
html.append(f"<strong>{entity_text}</strong>")
|
||||
elif entity_type == MessageEntityItalic:
|
||||
html.append(f"<em>{entity_text}</em>")
|
||||
elif entity_type == MessageEntityUnderline:
|
||||
html.append(f"<u>{entity_text}</u>")
|
||||
elif entity_type == MessageEntityStrike:
|
||||
html.append(f"<del>{entity_text}</del>")
|
||||
elif entity_type == MessageEntityBlockquote:
|
||||
html.append(f"<blockquote>{entity_text}</blockquote>")
|
||||
elif entity_type == MessageEntityCode:
|
||||
html.append(("<pre><code>{entity_text}</code></pre>"
|
||||
if "\n" in entity_text
|
||||
else "<code>{entity_text}</code>").format(entity_text=entity_text))
|
||||
html.append(f"<pre><code>{entity_text}</code></pre>"
|
||||
if "\n" in entity_text
|
||||
else f"<code>{entity_text}</code>")
|
||||
elif entity_type == MessageEntityPre:
|
||||
skip_entity = _parse_pre(html, entity_text, entity.language)
|
||||
elif entity_type == MessageEntityMention:
|
||||
@@ -254,8 +266,8 @@ def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -
|
||||
html.append(f"<font color='blue'>{entity_text}</font>")
|
||||
else:
|
||||
skip_entity = True
|
||||
last_offset = entity.offset + (0 if skip_entity else entity.length)
|
||||
html.append(text[last_offset:])
|
||||
last_offset = relative_offset + (0 if skip_entity else entity.length)
|
||||
html.append(escape(text[last_offset:]))
|
||||
|
||||
return "".join(html)
|
||||
|
||||
|
||||
@@ -20,38 +20,6 @@ import struct
|
||||
import re
|
||||
|
||||
|
||||
def unicode_to_html(text: str, html: str, ctrl: str, tag: str) -> str:
|
||||
if ctrl not in text:
|
||||
return html
|
||||
if not html:
|
||||
html = escape(text)
|
||||
tag_start = f"<{tag}>"
|
||||
tag_end = f"</{tag}>"
|
||||
characters = html.split(ctrl)
|
||||
html = ""
|
||||
in_tag = False
|
||||
for char in characters:
|
||||
if not in_tag:
|
||||
if len(char) > 1:
|
||||
html += char[0:-1]
|
||||
char = char[-1]
|
||||
html += tag_start
|
||||
in_tag = True
|
||||
html += char
|
||||
else:
|
||||
if len(char) > 1:
|
||||
html += tag_end
|
||||
in_tag = False
|
||||
html += char
|
||||
if in_tag:
|
||||
html += tag_end
|
||||
return html
|
||||
|
||||
|
||||
def html_to_unicode(text: str, ctrl: str) -> str:
|
||||
return ctrl.join(text) + ctrl
|
||||
|
||||
|
||||
# add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon.
|
||||
# Licensed under the MIT license.
|
||||
# https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py
|
||||
|
||||
@@ -6,7 +6,7 @@ extras = {
|
||||
"fast_crypto": ["cryptg>=0.1,<0.3"],
|
||||
"webp_convert": ["Pillow>=4.3.0,<7"],
|
||||
"hq_thumbnails": ["moviepy>=1.0,<2.0"],
|
||||
"metrics": ["prometheus-client>=0.6.0,<0.7.0"],
|
||||
"metrics": ["prometheus-client>=0.6.0,<0.8.0"],
|
||||
}
|
||||
extras["all"] = list({dep for deps in extras.values() for dep in deps})
|
||||
|
||||
@@ -38,7 +38,7 @@ setuptools.setup(
|
||||
"ruamel.yaml>=0.15.35,<0.16",
|
||||
"future-fstrings>=0.4.2",
|
||||
"python-magic>=0.4.15,<0.5",
|
||||
"telethon>=1.7,<1.9",
|
||||
"telethon>=1.9,<1.10",
|
||||
"telethon-session-sqlalchemy>=0.2.14,<0.3",
|
||||
],
|
||||
extras_require=extras,
|
||||
|
||||
Reference in New Issue
Block a user