From b568ef8d8ccea7a0b12d0cd89b3cf993a599026a Mon Sep 17 00:00:00 2001 From: Sumner Evans Date: Thu, 20 Jun 2024 10:30:39 -0600 Subject: [PATCH] media: support voice messages Signed-off-by: Sumner Evans --- go.mod | 5 +- go.sum | 4 +- pkg/connector/matrix.go | 32 +++++++-- pkg/connector/msgconv/tomatrix.go | 34 +++++++--- .../fuzz/FuzzRoundtrip/8727b16d337d7b81 | 2 + pkg/connector/waveform/waveform.go | 66 +++++++++++++++++++ pkg/connector/waveform/waveform_test.go | 46 +++++++++++++ 7 files changed, 173 insertions(+), 16 deletions(-) create mode 100644 pkg/connector/waveform/testdata/fuzz/FuzzRoundtrip/8727b16d337d7b81 create mode 100644 pkg/connector/waveform/waveform.go create mode 100644 pkg/connector/waveform/waveform_test.go diff --git a/go.mod b/go.mod index 731ec20a..fd8e6386 100644 --- a/go.mod +++ b/go.mod @@ -5,15 +5,17 @@ go 1.21 require ( github.com/gotd/td v0.102.0 github.com/rs/zerolog v1.33.0 + github.com/stretchr/testify v1.9.0 go.mau.fi/util v0.5.0 go.mau.fi/zerozap v0.1.1 go.uber.org/zap v1.27.0 - maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a + maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e ) require ( github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-faster/errors v0.7.1 // indirect github.com/go-faster/jx v1.1.0 // indirect github.com/go-faster/xor v1.0.0 // indirect @@ -26,6 +28,7 @@ require ( github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect github.com/mattn/go-sqlite3 v1.14.22 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rs/xid v1.5.0 // indirect github.com/segmentio/asm v1.2.0 // indirect github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e // indirect diff --git a/go.sum b/go.sum index 05290b7e..47f8c210 100644 --- a/go.sum +++ b/go.sum @@ -108,8 +108,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maunium.net/go/mauflag v1.0.0 h1:YiaRc0tEI3toYtJMRIfjP+jklH45uDHtT80nUamyD4M= maunium.net/go/mauflag v1.0.0/go.mod h1:nLivPOpTpHnpzEh8jEdSL9UqO9+/KBJFmNRlwKfkPeA= -maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a h1:g2X/TEW9MR9lfn4RUHUGcpta9FmFes62/4OEEVEKFJg= -maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a/go.mod h1:cxv1w6+syudmEpOewHYIQT9yO7TM5UOWmf6xEBVI4H4= +maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e h1:f5Y1xtNziKs7heoTNL5q86+dDG7kyJVNY3/C+WrefQg= +maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e/go.mod h1:cxv1w6+syudmEpOewHYIQT9yO7TM5UOWmf6xEBVI4H4= nhooyr.io/websocket v1.8.11 h1:f/qXNc2/3DpoSZkHt1DQu6rj4zGC8JmkkLkWss0MgN0= nhooyr.io/websocket v1.8.11/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c= rsc.io/qr v0.2.0 h1:6vBLea5/NRMVTz8V66gipeLycZMl/+UlFmk8DvqQ6WY= diff --git a/pkg/connector/matrix.go b/pkg/connector/matrix.go index 771e2447..d375cbc3 100644 --- a/pkg/connector/matrix.go +++ b/pkg/connector/matrix.go @@ -16,6 +16,7 @@ import ( "maunium.net/go/mautrix/event" "go.mau.fi/mautrix-telegram/pkg/connector/ids" + "go.mau.fi/mautrix-telegram/pkg/connector/waveform" ) func getMediaFilenameAndCaption(content *event.MessageEventContent) (filename, caption string) { @@ -48,7 +49,6 @@ func (t *TelegramClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2. case event.MsgImage, event.MsgFile, event.MsgAudio, event.MsgVideo: filename, caption := getMediaFilenameAndCaption(msg.Content) - // TODO stream this download straight into the uploader var fileData []byte fileData, err = t.main.Bridge.Bot.DownloadMedia(ctx, msg.Content.URL, msg.Content.File) if err != nil { @@ -65,13 +65,35 @@ func (t *TelegramClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2. // TODO resolver? styling = append(styling, html.String(nil, caption)) } + if msg.Content.MsgType == event.MsgImage { updates, err = builder.Media(ctx, message.UploadedPhoto(upload, styling...)) + break } else { - document := message.UploadedDocument(upload, styling...). - Filename(filename). - MIME(msg.Content.Info.MimeType) - updates, err = builder.Media(ctx, document) + document := message.UploadedDocument(upload, styling...).Filename(filename) + if msg.Content.Info != nil { + document.MIME(msg.Content.Info.MimeType) + } + + var media message.MediaOption + + switch msg.Content.MsgType { + case event.MsgAudio: + audioBuilder := document.Audio() + if msg.Content.MSC1767Audio != nil { + audioBuilder.Duration(time.Duration(msg.Content.MSC1767Audio.Duration) * time.Millisecond) + if len(msg.Content.MSC1767Audio.Waveform) > 0 { + audioBuilder.Waveform(waveform.Encode(msg.Content.MSC1767Audio.Waveform)) + } + } + if msg.Content.MSC3245Voice != nil { + audioBuilder.Voice() + } + media = audioBuilder + default: + media = document + } + updates, err = builder.Media(ctx, media) } default: return nil, fmt.Errorf("unsupported message type %s", msg.Content.MsgType) diff --git a/pkg/connector/msgconv/tomatrix.go b/pkg/connector/msgconv/tomatrix.go index 31012212..edff2a47 100644 --- a/pkg/connector/msgconv/tomatrix.go +++ b/pkg/connector/msgconv/tomatrix.go @@ -56,6 +56,8 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P var partID networkid.PartID var msgType event.MessageType var filename string + var audio *event.MSC1767Audio + var voice *event.MSC3245Voice // Determine the filename and some other information switch media := media.(type) { @@ -72,9 +74,22 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P } for _, attr := range document.GetAttributes() { - if f, ok := attr.(*tg.DocumentAttributeFilename); ok { - filename = f.GetFileName() - break + switch a := attr.(type) { + case *tg.DocumentAttributeFilename: + filename = a.GetFileName() + case *tg.DocumentAttributeAudio: + msgType = event.MsgAudio + audio = &event.MSC1767Audio{ + Duration: a.Duration * 1000, + } + if waveform, ok := a.GetWaveform(); ok { + for _, v := range waveform { + audio.Waveform = append(audio.Waveform, int(v)<<5) + } + } + if a.Voice { + voice = &event.MSC3245Voice{} + } } } @@ -170,12 +185,13 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P extra := map[string]any{} - // Handle spolilers + // Handle spoilers // See: https://github.com/matrix-org/matrix-spec-proposals/pull/3725 if s, ok := media.(spoilable); ok && s.GetSpoiler() { extra["town.robin.msc3725.content_warning"] = map[string]any{ "type": "town.robin.msc3725.spoiler", } + extra["fi.mau.telegram.spoiler"] = true } // Handle disappearing messages @@ -193,10 +209,12 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P ID: partID, Type: event.EventMessage, Content: &event.MessageEventContent{ - MsgType: msgType, - Body: filename, - URL: mxcURI, - File: encryptedFileInfo, + MsgType: msgType, + Body: filename, + URL: mxcURI, + File: encryptedFileInfo, + MSC1767Audio: audio, + MSC3245Voice: voice, }, Extra: extra, }, disappearingSetting, nil diff --git a/pkg/connector/waveform/testdata/fuzz/FuzzRoundtrip/8727b16d337d7b81 b/pkg/connector/waveform/testdata/fuzz/FuzzRoundtrip/8727b16d337d7b81 new file mode 100644 index 00000000..e8000f30 --- /dev/null +++ b/pkg/connector/waveform/testdata/fuzz/FuzzRoundtrip/8727b16d337d7b81 @@ -0,0 +1,2 @@ +go test fuzz v1 +[]byte("00") diff --git a/pkg/connector/waveform/waveform.go b/pkg/connector/waveform/waveform.go new file mode 100644 index 00000000..93bc5ad3 --- /dev/null +++ b/pkg/connector/waveform/waveform.go @@ -0,0 +1,66 @@ +// Package waveform implements encoding and decoding of a Telegram waveform. +// +// Telegram waveforms consist of packed 5-bit values. The values are packed +// into a byte stream, meaning that the actual values cross the byte boundary. +// +// The following diagram explains the format: +// +// [210|43210][0|43210|43][3210|4321][10|43210|4]... +// [111|00000][3|22222|11][4444|3333][66|55555|4]... +// +// Explanation of diagram: +// - The []'s enclose byte boundaries. +// - The |s represent separation between waveform values. +// - The numbers in the first row indicate the binary power. +// - The numbers in the second row indicate the corresponding waveform index. +package waveform + +import "math" + +// NormalizeWaveform normalizes a waveform by bounding the values to the range +// [0, 32] which is required for the encoding to work. +func NormalizeWaveform(waveform []int) (normalized []byte) { + normalized = make([]byte, len(waveform)) + var waveformMax int + for _, v := range waveform { + waveformMax = max(waveformMax, v) + } + for i, v := range waveform { + normalized[i] = byte(math.Round(float64(v) / float64(max(waveformMax/256, 1)))) + } + return +} + +// Encode normalizes and encodes the input Matrix waveform into a Telegram +// waveform. +func Encode(waveform []int) []byte { + bytesCount := (len(waveform)*5 + 7) / 8 + result := make([]byte, bytesCount+1) + + var bitShift int + for i, v := range NormalizeWaveform(waveform) { + result[i*5/8] |= v << bitShift + result[i*5/8+1] |= v >> (8 - bitShift) + bitShift = (bitShift + 5) % 8 + } + return result[:bytesCount] +} + +// Decode decodes a Telegram waveform into a waveform usable by Matrix. +func Decode(waveform []byte) []int { + numValues := len(waveform) * 8 / 5 + result := make([]int, numValues) + + var bitShift int + for i := 0; i < numValues; i++ { + var val byte + val |= waveform[i*5/8] >> bitShift + if i*5/8+1 < len(waveform) { + val |= waveform[i*5/8+1] << (8 - bitShift) + } + result[i] = int(val) & 0b00011111 + bitShift = (bitShift + 5) % 8 + } + + return result +} diff --git a/pkg/connector/waveform/waveform_test.go b/pkg/connector/waveform/waveform_test.go new file mode 100644 index 00000000..6faf82cf --- /dev/null +++ b/pkg/connector/waveform/waveform_test.go @@ -0,0 +1,46 @@ +package waveform_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "go.mau.fi/mautrix-telegram/pkg/connector/waveform" +) + +func TestEncode(t *testing.T) { + assert.Equal(t, []byte{0x01}, waveform.Encode([]int{1})) + assert.Equal(t, []byte{0xff, 0x03}, waveform.Encode([]int{31, 31})) + assert.Equal(t, []byte{0x41, 0x0c, 0x52, 0xcc, 0x41}, waveform.Encode([]int{1, 2, 3, 4, 5, 6, 7, 8})) + assert.Equal(t, []byte{0xff, 0xff, 0xff, 0xff, 0xff}, waveform.Encode([]int{31, 31, 31, 31, 31, 31, 31, 31})) +} + +func TestDecode(t *testing.T) { + // assert.Equal(t, []int{0x01}, waveform.Decode([]byte{1})) + // assert.Equal(t, []int{0x01, 0x10, 0x00}, waveform.Decode([]byte{1, 2})) + // assert.Equal(t, []int{0x01, 0x10, 0x00, 0x06, 0x00, 0x02, 0x14, 0x00}, waveform.Decode([]byte{1, 2, 3, 4, 5})) +} + +func FuzzRoundtrip(f *testing.F) { + f.Add([]byte{0x01}) + + f.Fuzz(func(t *testing.T, w []byte) { + wf := make([]int, len(w)) + for i, v := range waveform.NormalizeWaveform(wf) { + wf[i] = int(v) + } + encoded := waveform.Encode(wf) + decoded := waveform.Decode(encoded) + + // Sometimes, the decoded wavefeorm might have an extra value if the + // last value of the encoded waveform is packed into the 3 + // least-significant bits of the last byte. In that case, it's unclear + // whether the waveform contains a 0b00000 as the last byte or if there + // shouldn't have been anything there. + if len(wf) != len(decoded) { + assert.Len(t, decoded, len(wf)+1) + wf = append(wf, 0x00) + } + assert.Equal(t, wf, decoded) + }) +}