package html import ( "fmt" "strings" "testing" "github.com/stretchr/testify/require" "golang.org/x/net/html" "go.mau.fi/mautrix-telegram/pkg/gotd/telegram/message/entity" "go.mau.fi/mautrix-telegram/pkg/gotd/tg" ) type htmlTestCase struct { html string msg string entities func(msg string) []tg.MessageEntityClass wantErr bool skipReason string } func getEntities(formats ...entity.Formatter) func(msg string) []tg.MessageEntityClass { return func(msg string) []tg.MessageEntityClass { length := entity.ComputeLength(msg) r := make([]tg.MessageEntityClass, len(formats)) for i := range formats { r[i] = formats[i](0, length) } return r } } func TestHTML(t *testing.T) { runTests := func(tests []htmlTestCase, numericName bool) func(t *testing.T) { return func(t *testing.T) { for i, test := range tests { testName := test.msg if numericName || testName == "" { testName = fmt.Sprintf("Test%d", i+1) } t.Run(strings.Title(testName), func(t *testing.T) { t.Cleanup(func() { if t.Failed() { t.Logf("Input: %q", test.html) } }) if test.skipReason != "" { t.Skip(test.skipReason) } a := require.New(t) b := entity.Builder{} err := HTML(strings.NewReader(test.html), &b, Options{}) if test.wantErr { a.Error(err) return } a.NoError(err) var ( msg string entities []tg.MessageEntityClass ) if strings.TrimSpace(test.msg) != test.msg { // Complete cuts spaces and fixes entities, but TDLib test expects // that it happens after parsing. msg, entities = b.Raw() entity.SortEntities(entities) } else { msg, entities = b.Complete() } a.Equal(test.msg, msg) if test.entities != nil { expect := test.entities(test.msg) a.Len(entities, len(expect)) a.ElementsMatch(expect, entities) } else { a.Empty(entities) } }) } } } { tests := []htmlTestCase{ {html: "bold", msg: "bold", entities: getEntities(entity.Bold())}, {html: "bold", msg: "bold", entities: getEntities(entity.Bold())}, {html: "italic", msg: "italic", entities: getEntities(entity.Italic())}, {html: "italic", msg: "italic", entities: getEntities(entity.Italic())}, {html: "underline", msg: "underline", entities: getEntities(entity.Underline())}, {html: "underline", msg: "underline", entities: getEntities(entity.Underline())}, {html: "strikethrough", msg: "strikethrough", entities: getEntities(entity.Strike())}, {html: "strikethrough", msg: "strikethrough", entities: getEntities(entity.Strike())}, {html: "strikethrough", msg: "strikethrough", entities: getEntities(entity.Strike())}, {html: "code", msg: "code", entities: getEntities(entity.Code())}, {html: "
abc
", msg: "abc", entities: getEntities(entity.Pre(""))}, {html: `inline URL`, msg: "inline URL", entities: getEntities(entity.TextURL("http://www.example.com/"))}, {html: `inline mention of a user`, msg: "inline mention of a user", entities: getEntities(entity.MentionName(&tg.InputUser{ UserID: 123456789, }))}, {html: `
python code
`, msg: "python code", entities: getEntities(entity.Pre("python"))}, {html: "<", msg: "<", entities: getEntities(entity.Bold())}, {html: `spoiler`, msg: "spoiler", entities: getEntities(entity.Spoiler())}, {html: "๐Ÿ‘", msg: "๐Ÿ‘", entities: getEntities(entity.CustomEmoji(5368324170671202286))}, {html: "
quote
", msg: "quote", entities: getEntities(entity.Blockquote(true))}, {html: "
quote
", msg: "quote", entities: getEntities(entity.Blockquote(false))}, } t.Run("Common", runTests(tests, false)) } { negativeTests := []htmlTestCase{ {html: "�", wantErr: true}, {html: "�", wantErr: true}, {html: "�", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<", wantErr: true}, {html: "๐ŸŸ ๐ŸŸ<aa", wantErr: true}, } // FIXME(tdakkota): sanitize HTML _ = negativeTests t.Run("TDLib", runTests(tdlibHTMLTests(), true)) } } func TestIssue525(t *testing.T) { test := func(text string, expected []tg.MessageEntityClass) func(t *testing.T) { return func(t *testing.T) { a := require.New(t) b := entity.Builder{} p := htmlParser{ tokenizer: html.NewTokenizer(strings.NewReader(text)), builder: &b, attr: map[string]string{}, } a.NoError(p.parse()) _, entities := b.Complete() a.Equal(expected, entities) } } t.Run("Ru", test(`ะกั‚ั€ะพะบะฐ ะกั‚ั€ะพะบะฐ ั‚ะตะบัั‚ะฐ ะบัƒั€ัะธะฒะพะผ ะžะฑั‹ั‡ะฝั‹ะน ั‚ะตะบัั‚ ั ะกัั‹ะปะบะพะน ะฒะฝัƒั‚ั€ะธ, ะธ ะตั‰ั‘ ะพะดะฝะฐ ััั‹ะปะบะฐ - ะ—ะดะตััŒ. ะ•ั‰ั‘ ะพะดะฝะฐ ัั‚ั€ะพะบะฐ. `, []tg.MessageEntityClass{ &tg.MessageEntityItalic{ Offset: 7, Length: 22, }, &tg.MessageEntityTextURL{ Offset: 47, Length: 7, URL: "https://google.com", }, &tg.MessageEntityTextURL{ Offset: 83, Length: 5, URL: "https://go.dev", }, }), ) t.Run("En", test(`Line Italic line of text Normal line of text with Link inside, and another link now - Here. One more line. `, []tg.MessageEntityClass{ &tg.MessageEntityItalic{ Offset: 5, Length: 19, }, &tg.MessageEntityTextURL{ Offset: 51, Length: 4, URL: "https://google.com", }, &tg.MessageEntityTextURL{ Offset: 87, Length: 4, URL: "https://go.dev", }, }), ) }