From 430f7b72176cbf468bce40f211ab40692e3a2be1 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 11 May 2019 19:04:29 +0300 Subject: [PATCH] Handle void tags correctly in the HTML parser. Fixes #309 --- mautrix_telegram/formatter/from_matrix/html_reader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mautrix_telegram/formatter/from_matrix/html_reader.py b/mautrix_telegram/formatter/from_matrix/html_reader.py index a1fbe4bf..d707537c 100644 --- a/mautrix_telegram/formatter/from_matrix/html_reader.py +++ b/mautrix_telegram/formatter/from_matrix/html_reader.py @@ -29,6 +29,10 @@ class HTMLNode(list): class NodeifyingParser(HTMLParser): + # From https://www.w3.org/TR/html5/syntax.html#writing-html-documents-elements + void_tags = ("area", "base", "br", "col", "command", "embed", "hr", "img", "input", "link", + "meta", "param", "source", "track", "wbr") + def __init__(self): super().__init__() self.stack = [HTMLNode("html", [])] # type: List[HTMLNode] @@ -36,7 +40,11 @@ class NodeifyingParser(HTMLParser): def handle_starttag(self, tag, attrs): node = HTMLNode(tag, attrs) self.stack[-1].append(node) - self.stack.append(node) + if tag not in self.void_tags: + self.stack.append(node) + + def handle_startendtag(self, tag, attrs): + self.stack[-1].append(HTMLNode(tag, attrs)) def handle_endtag(self, tag): if tag == self.stack[-1].tag: