Merge branch 'native-strike-underline'

This commit is contained in:
Tulir Asokan
2019-07-06 20:50:07 +03:00
4 changed files with 44 additions and 73 deletions
@@ -18,15 +18,15 @@ from typing import List, Tuple, Pattern
import re
from telethon.tl.types import (MessageEntityMention as Mention, MessageEntityBotCommand as Command,
MessageEntityMentionName as MentionName, MessageEntityEmail as Email,
MessageEntityUrl as URL, MessageEntityTextUrl as TextURL,
MessageEntityMentionName as MentionName, MessageEntityUrl as URL,
MessageEntityEmail as Email, MessageEntityTextUrl as TextURL,
MessageEntityBold as Bold, MessageEntityItalic as Italic,
MessageEntityCode as Code, MessageEntityPre as Pre,
TypeMessageEntity)
MessageEntityStrike as Strike, MessageEntityUnderline as Underline,
MessageEntityBlockquote as Blockquote, TypeMessageEntity)
from ... import user as u, puppet as pu, portal as po
from ...types import MatrixUserID
from ..util import html_to_unicode
from .telegram_message import TelegramMessage, Entity, offset_length_multiply
from .html_reader import HTMLNode, read_html
@@ -101,13 +101,6 @@ class MatrixParser:
children.append(child)
return TelegramMessage.join(children, "\n")
@classmethod
def blockquote_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
msg = cls.tag_aware_parse_node(node, ctx)
children = msg.trim().split("\n")
children = [child.prepend("> ") for child in children]
return TelegramMessage.join(children, "\n")
@classmethod
def header_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
children = cls.node_to_tmessages(node, ctx)
@@ -122,15 +115,14 @@ class MatrixParser:
msg.format(Bold)
elif node.tag in ("i", "em"):
msg.format(Italic)
elif node.tag in ("s", "strike", "del"):
msg.format(Strike)
elif node.tag in ("u", "ins"):
msg.format(Underline)
elif node == "blockquote":
msg.format(Blockquote)
elif node.tag == "command":
msg.format(Command)
elif node.tag in ("s", "strike", "del"):
msg.text = html_to_unicode(msg.text, "\u0336")
elif node.tag in ("u", "ins"):
msg.text = html_to_unicode(msg.text, "\u0332")
if node.tag in ("s", "strike", "del", "u", "ins"):
msg.entities = Entity.adjust(msg.entities, offset_length_multiply(2))
return msg
@@ -171,9 +163,7 @@ class MatrixParser:
@classmethod
def node_to_tmessage(cls, node: HTMLNode, ctx: RecursionContext) -> TelegramMessage:
if node.tag == "blockquote":
return cls.blockquote_to_tmessage(node, ctx)
elif node.tag == "ol":
if node.tag == "ol":
return cls.list_to_tmessage(node, ctx)
elif node.tag == "ul":
return cls.list_to_tmessage(node, ctx.enter_list())
@@ -181,7 +171,8 @@ class MatrixParser:
return cls.header_to_tmessage(node, ctx)
elif node.tag == "br":
return TelegramMessage("\n")
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "command"):
elif node.tag in ("b", "strong", "i", "em", "s", "del", "u", "ins", "blockquote",
"command"):
return cls.basic_format_to_tmessage(node, ctx)
elif node.tag == "a":
return cls.link_to_tstring(node, ctx)
+29 -17
View File
@@ -24,7 +24,8 @@ from telethon.tl.types import (MessageEntityMention, MessageEntityMentionName, M
MessageEntityItalic, MessageEntityCode, MessageEntityPre,
MessageEntityBotCommand, MessageEntityHashtag, MessageEntityCashtag,
MessageEntityPhone, TypeMessageEntity, Message, PeerChannel,
MessageFwdHeader, PeerUser)
MessageEntityBlockquote, MessageEntityStrike, MessageFwdHeader,
MessageEntityUnderline, PeerUser)
from mautrix_appservice import MatrixRequestError
from mautrix_appservice.intent_api import IntentAPI
@@ -33,7 +34,7 @@ from .. import user as u, puppet as pu, portal as po
from ..types import TelegramID
from ..db import Message as DBMessage
from .util import (add_surrogates, remove_surrogates, trim_reply_fallback_html,
trim_reply_fallback_text, unicode_to_html)
trim_reply_fallback_text)
if TYPE_CHECKING:
from ..abstract_user import AbstractUser
@@ -194,9 +195,6 @@ async def telegram_to_matrix(evt: Message, source: "AbstractUser",
text += f"\n- {evt.post_author}"
html += f"<br/><i>- <u>{evt.post_author}</u></i>"
html = unicode_to_html(text, html, "\u0336", "del")
html = unicode_to_html(text, html, "\u0332", "u")
if html:
html = html.replace("\n", "<br/>")
@@ -214,29 +212,43 @@ def _telegram_entities_to_matrix_catch(text: str, entities: List[TypeMessageEnti
return "[failed conversion in _telegram_entities_to_matrix]"
def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -> str:
def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity],
offset: int = 0, length: int = None) -> str:
if not entities:
return text
return escape(text)
if length is None:
length = len(text)
html = []
last_offset = 0
for entity in entities:
if entity.offset > last_offset:
html.append(escape(text[last_offset:entity.offset]))
elif entity.offset < last_offset:
for i, entity in enumerate(entities):
if entity.offset > offset + length:
break
relative_offset = entity.offset - offset
if relative_offset > last_offset:
html.append(escape(text[last_offset:relative_offset]))
elif relative_offset < last_offset:
continue
skip_entity = False
entity_text = escape(text[entity.offset:entity.offset + entity.length])
entity_text = _telegram_entities_to_matrix(
text=text[relative_offset:relative_offset + entity.length],
entities=entities[i + 1:], offset=entity.offset, length=entity.length)
entity_type = type(entity)
if entity_type == MessageEntityBold:
html.append(f"<strong>{entity_text}</strong>")
elif entity_type == MessageEntityItalic:
html.append(f"<em>{entity_text}</em>")
elif entity_type == MessageEntityUnderline:
html.append(f"<u>{entity_text}</u>")
elif entity_type == MessageEntityStrike:
html.append(f"<del>{entity_text}</del>")
elif entity_type == MessageEntityBlockquote:
html.append(f"<blockquote>{entity_text}</blockquote>")
elif entity_type == MessageEntityCode:
html.append(("<pre><code>{entity_text}</code></pre>"
if "\n" in entity_text
else "<code>{entity_text}</code>").format(entity_text=entity_text))
html.append(f"<pre><code>{entity_text}</code></pre>"
if "\n" in entity_text
else f"<code>{entity_text}</code>")
elif entity_type == MessageEntityPre:
skip_entity = _parse_pre(html, entity_text, entity.language)
elif entity_type == MessageEntityMention:
@@ -254,8 +266,8 @@ def _telegram_entities_to_matrix(text: str, entities: List[TypeMessageEntity]) -
html.append(f"<font color='blue'>{entity_text}</font>")
else:
skip_entity = True
last_offset = entity.offset + (0 if skip_entity else entity.length)
html.append(text[last_offset:])
last_offset = relative_offset + (0 if skip_entity else entity.length)
html.append(escape(text[last_offset:]))
return "".join(html)
-32
View File
@@ -20,38 +20,6 @@ import struct
import re
def unicode_to_html(text: str, html: str, ctrl: str, tag: str) -> str:
if ctrl not in text:
return html
if not html:
html = escape(text)
tag_start = f"<{tag}>"
tag_end = f"</{tag}>"
characters = html.split(ctrl)
html = ""
in_tag = False
for char in characters:
if not in_tag:
if len(char) > 1:
html += char[0:-1]
char = char[-1]
html += tag_start
in_tag = True
html += char
else:
if len(char) > 1:
html += tag_end
in_tag = False
html += char
if in_tag:
html += tag_end
return html
def html_to_unicode(text: str, ctrl: str) -> str:
return ctrl.join(text) + ctrl
# add_surrogates and remove_surrogates are unicode surrogate utility functions from Telethon.
# Licensed under the MIT license.
# https://github.com/LonamiWebs/Telethon/blob/7cce7aa3e4c6c7019a55530391b1761d33e5a04e/telethon/helpers.py
+2 -2
View File
@@ -6,7 +6,7 @@ extras = {
"fast_crypto": ["cryptg>=0.1,<0.3"],
"webp_convert": ["Pillow>=4.3.0,<7"],
"hq_thumbnails": ["moviepy>=1.0,<2.0"],
"metrics": ["prometheus-client>=0.6.0,<0.7.0"],
"metrics": ["prometheus-client>=0.6.0,<0.8.0"],
}
extras["all"] = list({dep for deps in extras.values() for dep in deps})
@@ -38,7 +38,7 @@ setuptools.setup(
"ruamel.yaml>=0.15.35,<0.16",
"future-fstrings>=0.4.2",
"python-magic>=0.4.15,<0.5",
"telethon>=1.7,<1.9",
"telethon>=1.9,<1.10",
"telethon-session-sqlalchemy>=0.2.14,<0.3",
],
extras_require=extras,