diff --git a/mautrix_telegram/formatter.py b/mautrix_telegram/formatter.py
index 8b0d7577..c233201b 100644
--- a/mautrix_telegram/formatter.py
+++ b/mautrix_telegram/formatter.py
@@ -29,6 +29,20 @@ from .db import Message as DBMessage
 
 log = logging.getLogger("mau.formatter")
 
+# TEXT LEN EXPLANATION:
+# Telegram formatting counts two bytes in an UTF-16 string as one character.
+#
+# For Telegram -> Matrix formatting, we get the same counting mechanism by encoding the input
+# text as UTF-16 Little Endian and doubling all the offsets and lengths given by Telegram. With
+# those doubled values, we process the input entities and text. The text is converted back to
+# native str format before it's inserted into the output HTML.
+#
+# For Matrix -> Telegram formatting, do the same input encoding, but divide the length by two
+# instead of multiplying when generating the lengths and offsets of Telegram entities.
+#
+# The endianness doesn't matter, but it has to be specified to avoid the two BOM bits messing
+# everything up.
+TEMP_ENC = "utf-16-le"
 
 # region Matrix to Telegram
 
@@ -124,7 +138,9 @@ class MatrixParser(HTMLParser):
             self._open_tags_meta.appendleft(url)
 
         if entity_type and tag not in self._building_entities:
-            self._building_entities[tag] = entity_type(offset=len(self.text), length=0, **args)
+            # See "TEXT LEN EXPLANATION" near start of file
+            offset = int(len(self.text.encode(TEMP_ENC)) / 2)
+            self._building_entities[tag] = entity_type(offset=offset, length=0, **args)
 
     def _list_depth(self):
         depth = 0
@@ -159,7 +175,8 @@ class MatrixParser(HTMLParser):
                 text = f"{indent}{n}. {text}"
                 list_format_offset = len(indent) + 3
         for tag, entity in self._building_entities.items():
-            entity.length += len(text.strip("\n"))
+            # See "TEXT LEN EXPLANATION" near start of file
+            entity.length += int(len(text.strip("\n").encode(TEMP_ENC)) / 2)
             entity.offset += list_format_offset
 
         if text.endswith("\n"):
@@ -269,16 +286,20 @@ def telegram_to_matrix(text, entities):
 def _telegram_to_matrix(text, entities):
     if not entities:
         return text
+    # See "TEXT LEN EXPLANATION" near start of file
+    text = text.encode(TEMP_ENC)
     html = []
     last_offset = 0
     for entity in entities:
+        entity.offset *= 2
+        entity.length *= 2
         if entity.offset > last_offset:
-            html.append(escape(text[last_offset:entity.offset]))
+            html.append(escape(text[last_offset:entity.offset].decode(TEMP_ENC)))
         elif entity.offset < last_offset:
             continue
 
         skip_entity = False
-        entity_text = escape(text[entity.offset:entity.offset + entity.length])
+        entity_text = escape(text[entity.offset:entity.offset + entity.length].decode(TEMP_ENC))
         entity_type = type(entity)
 
         if entity_type == MessageEntityBold:
@@ -331,7 +352,7 @@ def _telegram_to_matrix(text, entities):
         else:
             skip_entity = True
         last_offset = entity.offset + (0 if skip_entity else entity.length)
-    html.append(text[last_offset:])
+    html.append(text[last_offset:].decode(TEMP_ENC))
 
     return "".join(html)