diff --git a/mautrix_telegram/formatter/from_matrix/html_reader.py b/mautrix_telegram/formatter/from_matrix/html_reader.py index a1fbe4bf..d707537c 100644 --- a/mautrix_telegram/formatter/from_matrix/html_reader.py +++ b/mautrix_telegram/formatter/from_matrix/html_reader.py @@ -29,6 +29,10 @@ class HTMLNode(list): class NodeifyingParser(HTMLParser): + # From https://www.w3.org/TR/html5/syntax.html#writing-html-documents-elements + void_tags = ("area", "base", "br", "col", "command", "embed", "hr", "img", "input", "link", + "meta", "param", "source", "track", "wbr") + def __init__(self): super().__init__() self.stack = [HTMLNode("html", [])] # type: List[HTMLNode] @@ -36,7 +40,11 @@ class NodeifyingParser(HTMLParser): def handle_starttag(self, tag, attrs): node = HTMLNode(tag, attrs) self.stack[-1].append(node) - self.stack.append(node) + if tag not in self.void_tags: + self.stack.append(node) + + def handle_startendtag(self, tag, attrs): + self.stack[-1].append(HTMLNode(tag, attrs)) def handle_endtag(self, tag): if tag == self.stack[-1].tag: