from html import escape import struct import re # Unicode surrogate handling from # https://github.com/LonamiWebs/Telethon/blob/master/telethon/extensions/markdown.py def add_surrogates(text): if text is None: return None return "".join("".join(chr(y) for y in struct.unpack(" ") or "\n" not in text: return text lines = text.split("\n") while len(lines) > 0 and lines[0].startswith("> "): lines.pop(0) return "\n".join(lines) HTML_REPLY_FALLBACK_REGEX = re.compile(r"^
[\s\S]+?
") def trim_reply_fallback_html(html): return HTML_REPLY_FALLBACK_REGEX.sub("", html) def unicode_to_html(text, html, ctrl, tag): if ctrl not in text: return html if not html: html = escape(text) tag_start = f"<{tag}>" tag_end = f"" characters = html.split(ctrl) html = "" in_tag = False for char in characters: if not in_tag: if len(char) > 1: html += char[0:-1] char = char[-1] html += tag_start in_tag = True html += char else: if len(char) > 1: html += tag_end in_tag = False html += char if in_tag: html += tag_end return html def html_to_unicode(text, ctrl): return ctrl.join(text) + ctrl