From 882582456ee6514a0a9be79fc8bb28407c4dcf31 Mon Sep 17 00:00:00 2001 From: Sumner Evans Date: Mon, 29 Jul 2024 14:55:11 -0600 Subject: [PATCH] telegramfmt: text formatting TG -> Matrix Signed-off-by: Sumner Evans --- go.mod | 2 +- pkg/connector/client.go | 64 +++++++++ pkg/connector/telegram.go | 78 ++++++----- pkg/connector/telegramfmt/convert.go | 150 ++++++++++++++++++++++ pkg/connector/telegramfmt/convert_test.go | 84 ++++++++++++ pkg/connector/telegramfmt/html.go | 115 +++++++++++++++++ pkg/connector/telegramfmt/tags.go | 139 ++++++++++++++++++++ pkg/connector/telegramfmt/tree.go | 113 ++++++++++++++++ pkg/connector/tomatrix.go | 62 ++++++--- 9 files changed, 749 insertions(+), 58 deletions(-) create mode 100644 pkg/connector/telegramfmt/convert.go create mode 100644 pkg/connector/telegramfmt/convert_test.go create mode 100644 pkg/connector/telegramfmt/html.go create mode 100644 pkg/connector/telegramfmt/tags.go create mode 100644 pkg/connector/telegramfmt/tree.go diff --git a/go.mod b/go.mod index 119bea26..1bdc3e35 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( go.mau.fi/util v0.6.0 go.mau.fi/zerozap v0.1.1 go.uber.org/zap v1.27.0 + golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 maunium.net/go/mautrix v0.19.1-0.20240719130542-cc5f225bc61c ) @@ -43,7 +44,6 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/crypto v0.25.0 // indirect - golang.org/x/exp v0.0.0-20240716175740-e3f259677ff7 // indirect golang.org/x/net v0.27.0 // indirect golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.22.0 // indirect diff --git a/pkg/connector/client.go b/pkg/connector/client.go index 8b6793a5..f21f481a 100644 --- a/pkg/connector/client.go +++ b/pkg/connector/client.go @@ -4,6 +4,8 @@ import ( "context" "errors" "fmt" + "regexp" + "strconv" "strings" "sync" @@ -20,6 +22,7 @@ import ( "go.mau.fi/mautrix-telegram/pkg/connector/ids" "go.mau.fi/mautrix-telegram/pkg/connector/media" "go.mau.fi/mautrix-telegram/pkg/connector/store" + "go.mau.fi/mautrix-telegram/pkg/connector/telegramfmt" "go.mau.fi/mautrix-telegram/pkg/connector/util" ) @@ -37,6 +40,8 @@ type TelegramClient struct { appConfig map[string]any appConfigHash int + + telegramFmtParams *telegramfmt.FormatParams } var ( @@ -78,6 +83,8 @@ func (u UpdateDispatcher) Handle(ctx context.Context, updates tg.UpdatesClass) e return u.UpdateDispatcher.Handle(ctx, updates) } +var messageLinkRegex = regexp.MustCompile(`^https?:\/\/t(?:elegram)?\.(?:me|dog)\/([A-Za-z][A-Za-z0-9_]{3,31}[A-Za-z0-9]|[Cc]\/[0-9]{1,20})\/([0-9]{1,20})$`) + func NewTelegramClient(ctx context.Context, tc *TelegramConnector, login *bridgev2.UserLogin) (*TelegramClient, error) { telegramUserID, err := ids.ParseUserLoginID(login.ID) if err != nil { @@ -141,6 +148,63 @@ func NewTelegramClient(ctx context.Context, tc *TelegramConnector, login *bridge }) client.clientCancel, err = connectTelegramClient(ctx, client.client) client.reactionMessageLocks = map[int]*sync.Mutex{} + + client.telegramFmtParams = &telegramfmt.FormatParams{ + GetUserInfo: func(ctx context.Context, id networkid.UserID) (telegramfmt.UserInfo, error) { + ghost, err := tc.Bridge.GetGhostByID(ctx, id) + if err != nil { + return telegramfmt.UserInfo{}, err + } + userInfo := telegramfmt.UserInfo{MXID: ghost.Intent.GetMXID(), Name: ghost.Name} + if id == client.userID { + userInfo.MXID = client.userLogin.UserMXID + } + return userInfo, nil + }, + NormalizeURL: func(ctx context.Context, url string) string { + log := zerolog.Ctx(ctx).With(). + Str("conversion_direction", "to_matrix"). + Str("entity_type", "url"). + Logger() + + if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "ftp://") && !strings.HasPrefix(url, "magnet://") { + url = "http://" + url + } + + submatches := messageLinkRegex.FindStringSubmatch(url) + if len(submatches) == 0 { + return url + } + group := submatches[1] + msgID, err := strconv.Atoi(submatches[2]) + if err != nil { + log.Err(err).Msg("error parsing message ID") + return url + } + + var portalKey networkid.PortalKey + if strings.HasPrefix(group, "C/") || strings.HasPrefix(group, "c/") { + portalKey = networkid.PortalKey{ID: networkid.PortalID(fmt.Sprintf("%s:%s", ids.PeerTypeChannel, group[2:]))} + } else { + portalKey = networkid.PortalKey{ID: networkid.PortalID(fmt.Sprintf("%s:%s", ids.PeerTypeUser, group))} + } + + portal, err := tc.Bridge.DB.Portal.GetByKey(ctx, portalKey) + if err != nil { + log.Err(err).Msg("error getting portal") + return url + } + + message, err := tc.Bridge.DB.Message.GetFirstPartByID(ctx, client.loginID, ids.MakeMessageID(msgID)) + if err != nil { + log.Err(err).Msg("error getting message") + return url + } + + return fmt.Sprintf("https://matrix.to/#/%s/%s", portal.MXID, message.MXID) + }, + } + go func() { err = updatesManager.Run(ctx, client.client.API(), telegramUserID, updates.AuthOptions{}) if err != nil { diff --git a/pkg/connector/telegram.go b/pkg/connector/telegram.go index 79427df0..19fcf20a 100644 --- a/pkg/connector/telegram.go +++ b/pkg/connector/telegram.go @@ -225,39 +225,33 @@ func (t *TelegramClient) onMessageEdit(ctx context.Context, update IGetMessage) Str("conversion_direction", "to_matrix"). Int("message_id", msg.ID) }, - ID: ids.MakeMessageID(msg.ID), - Sender: sender, - PortalKey: ids.MakePortalKey(msg.PeerID), - TargetMessage: ids.MakeMessageID(msg.ID), - Data: msg, - ConvertEditFunc: t.convertEdit, - Timestamp: time.Unix(int64(msg.EditDate), 0), + ID: ids.MakeMessageID(msg.ID), + Sender: sender, + PortalKey: ids.MakePortalKey(msg.PeerID), + TargetMessage: ids.MakeMessageID(msg.ID), + Data: msg, + Timestamp: time.Unix(int64(msg.EditDate), 0), + ConvertEditFunc: func(ctx context.Context, portal *bridgev2.Portal, intent bridgev2.MatrixAPI, existing []*database.Message, data *tg.Message) (*bridgev2.ConvertedEdit, error) { + converted, err := t.convertToMatrix(ctx, portal, intent, msg) + if err != nil { + return nil, err + } else if len(existing) != len(converted.Parts) { + return nil, fmt.Errorf("parts were added or removed in edit") + } + + var ce bridgev2.ConvertedEdit + for i, part := range converted.Parts { + if !bytes.Equal(existing[i].Metadata.(*MessageMetadata).ContentHash, part.DBMetadata.(*MessageMetadata).ContentHash) { + ce.ModifiedParts = append(ce.ModifiedParts, part.ToEditPart(existing[i])) + } + } + return &ce, nil + }, }) return nil } -func (t *TelegramClient) convertEdit(ctx context.Context, portal *bridgev2.Portal, intent bridgev2.MatrixAPI, existing []*database.Message, msg *tg.Message) (*bridgev2.ConvertedEdit, error) { - converted, err := t.convertToMatrix(ctx, portal, intent, msg) - if err != nil { - return nil, err - } - - if len(existing) != len(converted.Parts) { - return nil, fmt.Errorf("parts were added or removed in edit") - } - - var ce bridgev2.ConvertedEdit - for i, part := range converted.Parts { - if bytes.Equal(existing[i].Metadata.(*MessageMetadata).ContentHash, part.DBMetadata.(*MessageMetadata).ContentHash) { - continue - } - - ce.ModifiedParts = append(ce.ModifiedParts, part.ToEditPart(existing[i])) - } - return &ce, nil -} - func (t *TelegramClient) handleTelegramReactions(ctx context.Context, msg *tg.Message) { log := zerolog.Ctx(ctx).With(). Str("handler", "handle_telegram_reactions"). @@ -436,22 +430,24 @@ func (t *TelegramClient) getReactionLimit(ctx context.Context, sender networkid. func (t *TelegramClient) transferEmojisToMatrix(ctx context.Context, customEmojiIDs []int64) (result map[networkid.EmojiID]string, err error) { result, customEmojiIDs = emojis.ConvertKnownEmojis(customEmojiIDs) - if len(customEmojiIDs) > 0 { - customEmojiDocuments, err := t.client.API().MessagesGetCustomEmojiDocuments(ctx, customEmojiIDs) + if len(customEmojiIDs) == 0 { + return + } + + customEmojiDocuments, err := t.client.API().MessagesGetCustomEmojiDocuments(ctx, customEmojiIDs) + if err != nil { + return nil, err + } + + for _, customEmojiDocument := range customEmojiDocuments { + mxcURI, _, _, err := media.NewTransferer(t.client.API()). + WithStickerConfig(t.main.Config.AnimatedSticker). + WithDocument(customEmojiDocument, false). + Transfer(ctx, t.main.Store, t.main.Bridge.Bot) if err != nil { return nil, err } - - for _, customEmojiDocument := range customEmojiDocuments { - mxcURI, _, _, err := media.NewTransferer(t.client.API()). - WithStickerConfig(t.main.Config.AnimatedSticker). - WithDocument(customEmojiDocument, false). - Transfer(ctx, t.main.Store, t.main.Bridge.Bot) - if err != nil { - return nil, err - } - result[ids.MakeEmojiIDFromDocumentID(customEmojiDocument.GetID())] = string(mxcURI) - } + result[ids.MakeEmojiIDFromDocumentID(customEmojiDocument.GetID())] = string(mxcURI) } return } diff --git a/pkg/connector/telegramfmt/convert.go b/pkg/connector/telegramfmt/convert.go new file mode 100644 index 00000000..6eecf50d --- /dev/null +++ b/pkg/connector/telegramfmt/convert.go @@ -0,0 +1,150 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge. +// Copyright (C) 2024 Sumner Evans +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package telegramfmt + +import ( + "context" + "fmt" + "html" + "strings" + + "github.com/gotd/td/tg" + "golang.org/x/exp/maps" + "maunium.net/go/mautrix/bridgev2/networkid" + "maunium.net/go/mautrix/event" + "maunium.net/go/mautrix/id" + + "go.mau.fi/mautrix-telegram/pkg/connector/ids" +) + +type UserInfo struct { + MXID id.UserID + Name string +} + +type FormatParams struct { + CustomEmojis map[networkid.EmojiID]string + GetUserInfo func(ctx context.Context, id networkid.UserID) (UserInfo, error) + NormalizeURL func(ctx context.Context, url string) string +} + +func (fp FormatParams) GetCustomEmoji(emojiID networkid.EmojiID) (string, id.ContentURIString) { + if strings.HasPrefix(fp.CustomEmojis[emojiID], "mxc://") { + return "", id.ContentURIString(fp.CustomEmojis[emojiID]) + } else { + return fp.CustomEmojis[emojiID], "" + } +} + +func (fp FormatParams) WithCustomEmojis(emojis map[networkid.EmojiID]string) FormatParams { + return FormatParams{ + CustomEmojis: emojis, + GetUserInfo: fp.GetUserInfo, + NormalizeURL: fp.NormalizeURL, + } +} + +type formatContext struct { + IsInCodeblock bool +} + +func (ctx formatContext) TextToHTML(text string) string { + if ctx.IsInCodeblock { + return html.EscapeString(text) + } + return event.TextToHTML(text) +} + +func Parse(ctx context.Context, message string, entities []tg.MessageEntityClass, params FormatParams) (*event.MessageEventContent, error) { + content := &event.MessageEventContent{ + MsgType: event.MsgText, + Body: message, + Mentions: &event.Mentions{}, + } + if len(entities) == 0 { + return content, nil + } + + lrt := &LinkedRangeTree{} + mentions := map[id.UserID]struct{}{} + utf16Message := NewUTF16String(message) + maxLength := len(utf16Message) + for _, e := range entities { + br := BodyRange{ + Start: e.GetOffset(), + Length: e.GetLength(), + }.TruncateEnd(maxLength) + switch entity := e.(type) { + case *tg.MessageEntityMention: + // TODO + fmt.Printf("mention = %+v\n", entity) + case *tg.MessageEntityHashtag: + br.Value = Style{Type: StyleHashtag} + case *tg.MessageEntityBotCommand: + br.Value = Style{Type: StyleBotCommand} + case *tg.MessageEntityURL: + br.Value = Style{Type: StyleURL, URL: params.NormalizeURL(ctx, utf16Message[e.GetOffset():e.GetOffset()+e.GetLength()].String())} + case *tg.MessageEntityEmail: + br.Value = Style{Type: StyleEmail} + case *tg.MessageEntityBold: + br.Value = Style{Type: StyleBold} + case *tg.MessageEntityItalic: + br.Value = Style{Type: StyleItalic} + case *tg.MessageEntityCode: + br.Value = Style{Type: StyleCode} + case *tg.MessageEntityPre: + br.Value = Style{Type: StylePre, Language: entity.Language} + case *tg.MessageEntityTextURL: + br.Value = Style{Type: StyleURL, URL: params.NormalizeURL(ctx, entity.URL)} + case *tg.MessageEntityMentionName: + userID := ids.MakeUserID(entity.UserID) + userInfo, err := params.GetUserInfo(ctx, userID) + if err != nil { + return nil, err + } + mentions[userInfo.MXID] = struct{}{} + br.Value = Mention{UserInfo: userInfo, UserID: userID} + case *tg.MessageEntityPhone: + br.Value = Style{Type: StylePhone} + case *tg.MessageEntityCashtag: + br.Value = Style{Type: StyleCashtag} + case *tg.MessageEntityUnderline: + br.Value = Style{Type: StyleUnderline} + case *tg.MessageEntityStrike: + br.Value = Style{Type: StyleStrikethrough} + case *tg.MessageEntityBankCard: + br.Value = Style{Type: StyleBankCard} + case *tg.MessageEntitySpoiler: + br.Value = Style{Type: StyleSpoiler} + case *tg.MessageEntityCustomEmoji: + emoji, contentURI := params.GetCustomEmoji(ids.MakeEmojiIDFromDocumentID(entity.DocumentID)) + if emoji != "" { + br.Value = Style{Type: StyleCustomEmoji, Emoji: emoji} + } else { + br.Value = Style{Type: StyleCustomEmoji, EmojiURI: contentURI} + } + case *tg.MessageEntityBlockquote: + br.Value = Style{Type: StyleBlockquote} + } + lrt.Add(br) + } + + content.Mentions.UserIDs = maps.Keys(mentions) + content.FormattedBody = lrt.Format(utf16Message, formatContext{}) + content.Format = event.FormatHTML + return content, nil +} diff --git a/pkg/connector/telegramfmt/convert_test.go b/pkg/connector/telegramfmt/convert_test.go new file mode 100644 index 00000000..ccff2abc --- /dev/null +++ b/pkg/connector/telegramfmt/convert_test.go @@ -0,0 +1,84 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge. +// Copyright (C) 2024 Sumner Evans +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package telegramfmt_test + +import ( + "context" + "fmt" + "testing" + + "github.com/gotd/td/tg" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "maunium.net/go/mautrix/bridgev2/networkid" + "maunium.net/go/mautrix/event" + "maunium.net/go/mautrix/id" + + "go.mau.fi/mautrix-telegram/pkg/connector/telegramfmt" +) + +func TestParse(t *testing.T) { + formatParams := telegramfmt.FormatParams{ + GetUserInfo: func(ctx context.Context, userID networkid.UserID) (telegramfmt.UserInfo, error) { + if userID == "real" { + return telegramfmt.UserInfo{ + MXID: "@test:example.com", + Name: "Matrix User", + }, nil + } else { + return telegramfmt.UserInfo{ + MXID: id.UserID(fmt.Sprintf("@telegram_%s:example.com", userID)), + Name: "Signal User", + }, nil + } + }, + } + tests := []struct { + name string + ins string + ine []tg.MessageEntityClass + body string + html string + + extraChecks func(*testing.T, *event.MessageEventContent) + }{ + { + name: "empty", + extraChecks: func(t *testing.T, content *event.MessageEventContent) { + assert.Empty(t, content.FormattedBody) + assert.Empty(t, content.Body) + }, + }, + { + name: "plain", + ins: "Hello world!", + body: "Hello world!", + extraChecks: func(t *testing.T, content *event.MessageEventContent) { + assert.Empty(t, content.FormattedBody) + assert.Empty(t, content.Format) + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + parsed, err := telegramfmt.Parse(context.TODO(), test.ins, test.ine, formatParams) + require.NoError(t, err) + assert.Equal(t, test.body, parsed.Body) + assert.Equal(t, test.html, parsed.FormattedBody) + }) + } +} diff --git a/pkg/connector/telegramfmt/html.go b/pkg/connector/telegramfmt/html.go new file mode 100644 index 00000000..5de093aa --- /dev/null +++ b/pkg/connector/telegramfmt/html.go @@ -0,0 +1,115 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge. +// Copyright (C) 2024 Sumner Evans +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package telegramfmt + +import ( + "fmt" + "strings" + "unicode/utf16" +) + +func (m Mention) Format(message string) string { + return fmt.Sprintf(`%s`, m.MXID.URI().MatrixToURL(), m.Name) +} + +func (s Style) Format(message string) string { + switch s.Type { + case StyleBold: + return fmt.Sprintf("%s", message) + case StyleItalic: + return fmt.Sprintf("%s", message) + case StyleSpoiler: + return fmt.Sprintf("%s", message) + case StyleStrikethrough: + return fmt.Sprintf("%s", message) + case StyleCode: + if strings.ContainsRune(message, '\n') { + // This is somewhat incorrect, as it won't allow inline text before/after a multiline monospace-formatted string. + return fmt.Sprintf("
%s
", message) + } + return fmt.Sprintf("%s", message) + case StyleUnderline: + return fmt.Sprintf("%s", message) + case StyleBlockquote: + return fmt.Sprintf("
%s
", message) + case StylePre: + if s.Language != "" { + return fmt.Sprintf("
%s
", s.Language, message) + } else { + return fmt.Sprintf("
%s
", message) + } + case StyleEmail: + return fmt.Sprintf(`%s`, message, message) + case StyleTextURL: + if strings.HasPrefix(s.URL, "https://matrix.to/#") { + return s.URL + } + return fmt.Sprintf(`%s`, s.URL, message) + case StyleURL: + if strings.HasPrefix(s.URL, "https://matrix.to/#") { + return s.URL + } + return fmt.Sprintf(`%s`, s.URL, message) + case StyleCustomEmoji: + if s.Emoji != "" { + return s.Emoji + } else { + return fmt.Sprintf( + `%s`, + s.EmojiURI, message, message, + ) + } + case StyleBotCommand: + return fmt.Sprintf("%s", message) + case StyleHashtag: + return fmt.Sprintf("%s", message) + case StyleCashtag: + return fmt.Sprintf("%s", message) + case StylePhone: + return fmt.Sprintf("%s", message) + default: + return message + } +} + +type UTF16String []uint16 + +func NewUTF16String(s string) UTF16String { + return utf16.Encode([]rune(s)) +} + +func (u UTF16String) String() string { + return string(utf16.Decode(u)) +} + +func (lrt *LinkedRangeTree) Format(message UTF16String, ctx formatContext) string { + if lrt == nil || lrt.Node == nil { + return ctx.TextToHTML(message.String()) + } + head := message[:lrt.Node.Start] + headStr := ctx.TextToHTML(head.String()) + inner := message[lrt.Node.Start:lrt.Node.End()] + tail := message[lrt.Node.End():] + ourCtx := ctx + if lrt.Node.Value.IsCode() { + ourCtx.IsInCodeblock = true + } + childMessage := lrt.Child.Format(inner, ourCtx) + formattedChildMessage := lrt.Node.Value.Format(childMessage) + siblingMessage := lrt.Sibling.Format(tail, ctx) + return headStr + formattedChildMessage + siblingMessage +} diff --git a/pkg/connector/telegramfmt/tags.go b/pkg/connector/telegramfmt/tags.go new file mode 100644 index 00000000..8e3f4d9b --- /dev/null +++ b/pkg/connector/telegramfmt/tags.go @@ -0,0 +1,139 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge. +// Copyright (C) 2024 Sumner Evans +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package telegramfmt + +import ( + "fmt" + + "maunium.net/go/mautrix/bridgev2/networkid" + "maunium.net/go/mautrix/id" +) + +type BodyRangeValue interface { + String() string + Format(message string) string + IsCode() bool +} + +type Mention struct { + UserInfo + UserID networkid.UserID +} + +var _ BodyRangeValue = Mention{} + +func (m Mention) String() string { + return fmt.Sprintf("Mention{MXID: id.UserID(%q), Name: %q}", m.MXID, m.Name) +} + +func (m Mention) IsCode() bool { + return false +} + +type StyleType int + +var _ BodyRangeValue = Mention{} + +const ( + StyleNone StyleType = iota + StyleBold + StyleItalic + StyleUnderline + StyleStrikethrough + StyleBlockquote + StyleCode + StylePre + StyleEmail + StyleTextURL + StyleURL + StyleCustomEmoji + StyleBotCommand + StyleHashtag + StyleCashtag + StylePhone + StyleSpoiler + StyleBankCard +) + +func (s StyleType) String() string { + switch s { + case StyleNone: + return "StyleNone" + case StyleBold: + return "StyleBold" + case StyleItalic: + return "StyleItalic" + case StyleUnderline: + return "StyleUnderline" + case StyleStrikethrough: + return "StyleStrikethrough" + case StyleBlockquote: + return "StyleBlockquote" + case StyleCode: + return "StyleCode" + case StylePre: + return "StylePre" + case StyleEmail: + return "StyleEmail" + case StyleTextURL: + return "StyleTextURL" + case StyleURL: + return "StyleEntityURL" + case StyleCustomEmoji: + return "StyleCustomEmoji" + case StyleBotCommand: + return "StyleBotCommand" + case StyleHashtag: + return "StyleHashtag" + case StyleCashtag: + return "StyleCashtag" + case StylePhone: + return "StylePhone" + case StyleSpoiler: + return "StyleSpoiler" + case StyleBankCard: + return "StyleBankCard" + default: + return fmt.Sprintf("StyleType(%d)", s) + } +} + +// Style represents a style to apply to a range of text. +type Style struct { + // Type is the type of style. + Type StyleType + + // Language is the language of the code block, if applicable. + Language string + + // URL is the URL to link to, if applicable. + URL string + + // Emoji is the emoji to display, if applicable. + Emoji string + + // EmojiURI is the URI to the emoji, if applicable. + EmojiURI id.ContentURIString +} + +func (s Style) String() string { + return fmt.Sprintf("Style{Type: %s, Language: %s, URL: %s}", s.Type, s.Language, s.URL) +} + +func (s Style) IsCode() bool { + return s.Type == StyleCode || s.Type == StylePre +} diff --git a/pkg/connector/telegramfmt/tree.go b/pkg/connector/telegramfmt/tree.go new file mode 100644 index 00000000..92e4d0a4 --- /dev/null +++ b/pkg/connector/telegramfmt/tree.go @@ -0,0 +1,113 @@ +// mautrix-telegram - A Matrix-Telegram puppeting bridge. +// Copyright (C) 2024 Sumner Evans +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package telegramfmt + +import ( + "fmt" + "sort" +) + +type BodyRange struct { + Start int + Length int + Value BodyRangeValue +} + +type BodyRangeList []BodyRange + +var _ sort.Interface = BodyRangeList(nil) + +func (b BodyRangeList) Len() int { + return len(b) +} + +func (b BodyRangeList) Less(i, j int) bool { + return b[i].Start < b[j].Start || b[i].Length > b[j].Length +} + +func (b BodyRangeList) Swap(i, j int) { + b[i], b[j] = b[j], b[i] +} + +func (b BodyRange) String() string { + return fmt.Sprintf("%d:%d:%v", b.Start, b.Length, b.Value) +} + +// End returns the end index of the range. +func (b BodyRange) End() int { + return b.Start + b.Length +} + +// Offset changes the start of the range without affecting the length. +func (b BodyRange) Offset(offset int) *BodyRange { + b.Start += offset + return &b +} + +// TruncateStart changes the length of the range, so it starts at the given +// index and ends at the same index as before. +func (b BodyRange) TruncateStart(startAt int) *BodyRange { + if b.Start < startAt { + b.Length -= startAt - b.Start + b.Start = startAt + } + return &b +} + +// TruncateEnd changes the length of the range, so it ends at or before the +// given index and starts at the same index as before. +func (b BodyRange) TruncateEnd(maxEnd int) *BodyRange { + if b.End() > maxEnd { + b.Length = maxEnd - b.Start + } + return &b +} + +// LinkedRangeTree is a linked tree of formatting entities. +// +// It's meant to parse a list of Telegram entity ranges into nodes that either +// overlap completely or not at all, which enables more natural conversion to +// HTML. +type LinkedRangeTree struct { + Node *BodyRange + Sibling *LinkedRangeTree + Child *LinkedRangeTree +} + +func ptrAdd(to **LinkedRangeTree, r *BodyRange) { + if *to == nil { + *to = &LinkedRangeTree{} + } + (*to).Add(r) +} + +// Add adds the given formatting entity to this tree. +func (lrt *LinkedRangeTree) Add(r *BodyRange) { + if lrt.Node == nil { + lrt.Node = r + return + } + lrtEnd := lrt.Node.End() + if r.Start >= lrtEnd { + ptrAdd(&lrt.Sibling, r.Offset(-lrtEnd)) + return + } + if r.End() > lrtEnd { + ptrAdd(&lrt.Sibling, r.TruncateStart(lrtEnd).Offset(-lrtEnd)) + } + ptrAdd(&lrt.Child, r.TruncateEnd(lrtEnd).Offset(-lrt.Node.Start)) +} diff --git a/pkg/connector/tomatrix.go b/pkg/connector/tomatrix.go index 816bfc4b..36c81bd7 100644 --- a/pkg/connector/tomatrix.go +++ b/pkg/connector/tomatrix.go @@ -20,6 +20,7 @@ import ( "go.mau.fi/mautrix-telegram/pkg/connector/ids" "go.mau.fi/mautrix-telegram/pkg/connector/media" + "go.mau.fi/mautrix-telegram/pkg/connector/telegramfmt" "go.mau.fi/mautrix-telegram/pkg/connector/util" "go.mau.fi/mautrix-telegram/pkg/connector/waveform" ) @@ -97,35 +98,33 @@ func (c *TelegramClient) mediaToMatrix(ctx context.Context, portal *bridgev2.Por } } -func (c *TelegramClient) convertToMatrix(ctx context.Context, portal *bridgev2.Portal, intent bridgev2.MatrixAPI, msg *tg.Message) (*bridgev2.ConvertedMessage, error) { +func (c *TelegramClient) convertToMatrix(ctx context.Context, portal *bridgev2.Portal, intent bridgev2.MatrixAPI, msg *tg.Message) (cm *bridgev2.ConvertedMessage, err error) { log := zerolog.Ctx(ctx).With().Str("conversion_direction", "to_matrix").Logger() ctx = log.WithContext(ctx) - cm := &bridgev2.ConvertedMessage{} + cm = &bridgev2.ConvertedMessage{} hasher := sha256.New() if len(msg.Message) > 0 { - var linkPreviews []*event.BeeperLinkPreview + hasher.Write([]byte(msg.Message)) + + content, err := c.parseBodyAndHTML(ctx, msg.Message, msg.Entities) + if err != nil { + return nil, err + } if media, ok := msg.GetMedia(); ok && media.TypeID() == tg.MessageMediaWebPageTypeID { preview, err := c.webpageToBeeperLinkPreview(ctx, intent, media) if err != nil { - return nil, err + log.Err(err).Msg("error converting webpage to link preview") } else if preview != nil { - linkPreviews = append(linkPreviews, preview) + content.BeeperLinkPreviews = append(content.BeeperLinkPreviews, preview) } } - hasher.Write([]byte(msg.Message)) - - // TODO formatting cm.Parts = []*bridgev2.ConvertedMessagePart{ { - ID: networkid.PartID("caption"), - Type: event.EventMessage, - Content: &event.MessageEventContent{ - MsgType: event.MsgText, - Body: msg.Message, - BeeperLinkPreviews: linkPreviews, - }, + ID: networkid.PartID("caption"), + Type: event.EventMessage, + Content: content, }, } } @@ -153,7 +152,38 @@ func (c *TelegramClient) convertToMatrix(ctx context.Context, portal *bridgev2.P ContentURI: contentURI, } - return cm, nil + if replyTo, ok := msg.GetReplyTo(); ok { + switch replyTo := replyTo.(type) { + case *tg.MessageReplyHeader: + cm.ReplyTo = &networkid.MessageOptionalPartID{ + MessageID: ids.MakeMessageID(replyTo.ReplyToMsgID), + } + default: + log.Warn().Type("reply_to", replyTo).Msg("unhandled reply to type") + } + } + + return +} + +func (t *TelegramClient) parseBodyAndHTML(ctx context.Context, message string, entities []tg.MessageEntityClass) (*event.MessageEventContent, error) { + if len(entities) == 0 { + return &event.MessageEventContent{MsgType: event.MsgText, Body: message}, nil + } + + var customEmojiIDs []int64 + for _, entity := range entities { + switch entity := entity.(type) { + case *tg.MessageEntityCustomEmoji: + customEmojiIDs = append(customEmojiIDs, entity.DocumentID) + } + } + customEmojis, err := t.transferEmojisToMatrix(ctx, customEmojiIDs) + if err != nil { + return nil, err + } + fmt.Printf("ce %+v\n", customEmojis) // TODO DEBUG + return telegramfmt.Parse(ctx, message, entities, t.telegramFmtParams.WithCustomEmojis(customEmojis)) } func (c *TelegramClient) webpageToBeeperLinkPreview(ctx context.Context, intent bridgev2.MatrixAPI, msgMedia tg.MessageMediaClass) (preview *event.BeeperLinkPreview, err error) {