7a04f298d2
- update to latest telegram layer - remove some references to fields in tg.Entities that don't exist in the schema - originally added here: https://github.com/beeper/td/commit/820929062a2ba0104397bc01235ab58a9cff780e - referenced here - https://github.com/mautrix/telegramgo/commit/124f0967ed195b5a380c9bd02e170ada9710dde3 - https://github.com/mautrix/telegramgo/commit/4205047aab2e0639217148b5d125bfaab668bd8e
212 lines
6.5 KiB
Go
212 lines
6.5 KiB
Go
package html
|
||
|
||
import (
|
||
"fmt"
|
||
"strings"
|
||
"testing"
|
||
|
||
"github.com/stretchr/testify/require"
|
||
"golang.org/x/net/html"
|
||
|
||
"go.mau.fi/mautrix-telegram/pkg/gotd/telegram/message/entity"
|
||
"go.mau.fi/mautrix-telegram/pkg/gotd/tg"
|
||
)
|
||
|
||
type htmlTestCase struct {
|
||
html string
|
||
msg string
|
||
entities func(msg string) []tg.MessageEntityClass
|
||
wantErr bool
|
||
skipReason string
|
||
}
|
||
|
||
func getEntities(formats ...entity.Formatter) func(msg string) []tg.MessageEntityClass {
|
||
return func(msg string) []tg.MessageEntityClass {
|
||
length := entity.ComputeLength(msg)
|
||
r := make([]tg.MessageEntityClass, len(formats))
|
||
for i := range formats {
|
||
r[i] = formats[i](0, length)
|
||
}
|
||
return r
|
||
}
|
||
}
|
||
|
||
func TestHTML(t *testing.T) {
|
||
runTests := func(tests []htmlTestCase, numericName bool) func(t *testing.T) {
|
||
return func(t *testing.T) {
|
||
for i, test := range tests {
|
||
testName := test.msg
|
||
if numericName || testName == "" {
|
||
testName = fmt.Sprintf("Test%d", i+1)
|
||
}
|
||
t.Run(strings.Title(testName), func(t *testing.T) {
|
||
t.Cleanup(func() {
|
||
if t.Failed() {
|
||
t.Logf("Input: %q", test.html)
|
||
}
|
||
})
|
||
if test.skipReason != "" {
|
||
t.Skip(test.skipReason)
|
||
}
|
||
a := require.New(t)
|
||
b := entity.Builder{}
|
||
|
||
err := HTML(strings.NewReader(test.html), &b, Options{})
|
||
if test.wantErr {
|
||
a.Error(err)
|
||
return
|
||
}
|
||
a.NoError(err)
|
||
|
||
var (
|
||
msg string
|
||
entities []tg.MessageEntityClass
|
||
)
|
||
if strings.TrimSpace(test.msg) != test.msg {
|
||
// Complete cuts spaces and fixes entities, but TDLib test expects
|
||
// that it happens after parsing.
|
||
msg, entities = b.Raw()
|
||
entity.SortEntities(entities)
|
||
} else {
|
||
msg, entities = b.Complete()
|
||
}
|
||
|
||
a.Equal(test.msg, msg)
|
||
if test.entities != nil {
|
||
expect := test.entities(test.msg)
|
||
a.Len(entities, len(expect))
|
||
a.ElementsMatch(expect, entities)
|
||
} else {
|
||
a.Empty(entities)
|
||
}
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
{
|
||
tests := []htmlTestCase{
|
||
{html: "<b>bold</b>", msg: "bold", entities: getEntities(entity.Bold())},
|
||
{html: "<strong>bold</strong>", msg: "bold", entities: getEntities(entity.Bold())},
|
||
{html: "<i>italic</i>", msg: "italic", entities: getEntities(entity.Italic())},
|
||
{html: "<em>italic</em>", msg: "italic", entities: getEntities(entity.Italic())},
|
||
{html: "<u>underline</u>", msg: "underline", entities: getEntities(entity.Underline())},
|
||
{html: "<ins>underline</ins>", msg: "underline", entities: getEntities(entity.Underline())},
|
||
{html: "<s>strikethrough</s>", msg: "strikethrough", entities: getEntities(entity.Strike())},
|
||
{html: "<strike>strikethrough</strike>", msg: "strikethrough", entities: getEntities(entity.Strike())},
|
||
{html: "<del>strikethrough</del>", msg: "strikethrough", entities: getEntities(entity.Strike())},
|
||
{html: "<code>code</code>", msg: "code", entities: getEntities(entity.Code())},
|
||
{html: "<pre>abc</pre>", msg: "abc", entities: getEntities(entity.Pre(""))},
|
||
{html: `<a href="http://www.example.com/">inline URL</a>`, msg: "inline URL",
|
||
entities: getEntities(entity.TextURL("http://www.example.com/"))},
|
||
{html: `<a href="tg://user?id=123456789">inline mention of a user</a>`, msg: "inline mention of a user",
|
||
entities: getEntities(entity.MentionName(&tg.InputUser{
|
||
UserID: 123456789,
|
||
}))},
|
||
{html: `<pre><code class="language-python">python code</code></pre>`, msg: "python code",
|
||
entities: getEntities(entity.Pre("python"))},
|
||
{html: "<b><</b>", msg: "<", entities: getEntities(entity.Bold())},
|
||
{html: `<span class="tg-spoiler">spoiler</span>`, msg: "spoiler", entities: getEntities(entity.Spoiler())},
|
||
{html: "<tg-emoji emoji-id=\"5368324170671202286\">👍</tg-emoji>", msg: "👍", entities: getEntities(entity.CustomEmoji(5368324170671202286))},
|
||
{html: "<blockquote expandable>quote</blockquote>", msg: "quote", entities: getEntities(entity.Blockquote(true))},
|
||
{html: "<blockquote>quote</blockquote>", msg: "quote", entities: getEntities(entity.Blockquote(false))},
|
||
}
|
||
t.Run("Common", runTests(tests, false))
|
||
}
|
||
|
||
{
|
||
negativeTests := []htmlTestCase{
|
||
{html: "�", wantErr: true},
|
||
{html: "�", wantErr: true},
|
||
{html: "�", wantErr: true},
|
||
{html: "🏟 🏟<<abacaba", wantErr: true},
|
||
{html: "🏟 🏟<<abac aba>", wantErr: true},
|
||
{html: "🏟 🏟<<abac>", wantErr: true},
|
||
{html: "🏟 🏟<<i =aba>", wantErr: true},
|
||
{html: "🏟 🏟<<i aba>", wantErr: true},
|
||
{html: "🏟 🏟<<i aba = ", wantErr: true},
|
||
{html: "🏟 🏟<<i aba = 190azAz-.,", wantErr: true},
|
||
{html: "🏟 🏟<<i aba = \"<>">", wantErr: true},
|
||
{html: "🏟 🏟<<i aba = \\'<>">", wantErr: true},
|
||
{html: "🏟 🏟<</", wantErr: true},
|
||
{html: "🏟 🏟<<b></b></", wantErr: true},
|
||
{html: "🏟 🏟<<i>a</i ", wantErr: true},
|
||
{html: "🏟 🏟<<i>a</em >", wantErr: true},
|
||
}
|
||
// FIXME(tdakkota): sanitize HTML
|
||
_ = negativeTests
|
||
|
||
t.Run("TDLib", runTests(tdlibHTMLTests(), true))
|
||
}
|
||
}
|
||
|
||
func TestIssue525(t *testing.T) {
|
||
test := func(text string, expected []tg.MessageEntityClass) func(t *testing.T) {
|
||
return func(t *testing.T) {
|
||
a := require.New(t)
|
||
|
||
b := entity.Builder{}
|
||
p := htmlParser{
|
||
tokenizer: html.NewTokenizer(strings.NewReader(text)),
|
||
builder: &b,
|
||
attr: map[string]string{},
|
||
}
|
||
|
||
a.NoError(p.parse())
|
||
_, entities := b.Complete()
|
||
a.Equal(expected, entities)
|
||
}
|
||
}
|
||
|
||
t.Run("Ru", test(`Строка
|
||
<i>Строка текста курсивом</i>
|
||
|
||
Обычный текст с <a href="https://google.com">Ссылкой</a> внутри, и
|
||
ещё одна ссылка - <a href="https://go.dev">Здесь</a>.
|
||
|
||
Ещё одна строка.
|
||
`,
|
||
[]tg.MessageEntityClass{
|
||
&tg.MessageEntityItalic{
|
||
Offset: 7,
|
||
Length: 22,
|
||
},
|
||
&tg.MessageEntityTextURL{
|
||
Offset: 47,
|
||
Length: 7,
|
||
URL: "https://google.com",
|
||
},
|
||
&tg.MessageEntityTextURL{
|
||
Offset: 83,
|
||
Length: 5,
|
||
URL: "https://go.dev",
|
||
},
|
||
}),
|
||
)
|
||
t.Run("En", test(`Line
|
||
<i>Italic line of text</i>
|
||
|
||
Normal line of text with <a href="https://google.com">Link</a> inside, and
|
||
another link now - <a href="https://go.dev">Here</a>.
|
||
|
||
One more line.
|
||
`,
|
||
[]tg.MessageEntityClass{
|
||
&tg.MessageEntityItalic{
|
||
Offset: 5,
|
||
Length: 19,
|
||
},
|
||
&tg.MessageEntityTextURL{
|
||
Offset: 51,
|
||
Length: 4,
|
||
URL: "https://google.com",
|
||
},
|
||
&tg.MessageEntityTextURL{
|
||
Offset: 87,
|
||
Length: 4,
|
||
URL: "https://go.dev",
|
||
},
|
||
}),
|
||
)
|
||
}
|