Fix Matrix->Telegram formatting and add mention bridging

This commit is contained in:
Tulir Asokan
2017-12-01 18:57:52 +02:00
parent 0709e909bd
commit 04dc8ee3e9
5 changed files with 197 additions and 100 deletions
+6 -7
View File
@@ -65,12 +65,6 @@
"warn",
120
],
"no-underscore-dangle": [
"error",
{
"allowAfterThis": true
}
],
"no-unused-vars": [
"error",
{
@@ -143,6 +137,10 @@
"allowEmptyCatch": true
}
],
"no-cond-assign": [
"error",
"except-parens"
],
"function-paren-newline": "off",
"no-labels": "off",
"no-control-regex": "off",
@@ -170,6 +168,7 @@
"no-template-curly-in-string": "off",
"no-await-in-loop": "off",
"no-restricted-globals": "off",
"no-fallthrough": "off"
"no-fallthrough": "off",
"no-underscore-dangle": "off"
}
}
+2 -2
View File
@@ -41,7 +41,7 @@ If you don't know the MXID of the puppet, you can search for users using the `se
* Matrix → Telegram
* [x] Plaintext messages
* [x] Formatted messages
* [ ] Non-plaintext mentions
* [x] Mentions
* [x] Locations
* [ ] Images
* [ ] Files
@@ -55,7 +55,7 @@ If you don't know the MXID of the puppet, you can search for users using the `se
* Telegram → Matrix
* [x] Plaintext messages
* [x] Formatted messages
* [ ] Non-plaintext mentions
* [x] Mentions
* [x] Images
* [x] Locations
* [ ] Stickers (somewhat works through document upload, no preview though)
+5 -1
View File
@@ -159,6 +159,10 @@ class MautrixTelegram {
return this.config.bridge.username_template.replace("${ID}", id)
}
getMXIDForTelegramUser(id) {
return `@${this.getUsernameForTelegramUser(id)}:${this.config.homeserver.domain}`
}
/**
* Get the matrix.to link for the Matrix puppet of the Telegram user with the given ID.
*
@@ -166,7 +170,7 @@ class MautrixTelegram {
* @returns {string} A matrix.to link that points to the Matrix puppet of the given user.
*/
getMatrixToLinkForTelegramUser(id) {
return `https://matrix.to/#/@${this.getUsernameForTelegramUser(id)}:${this.config.homeserver.domain}`
return `https://matrix.to/#/${this.getMXIDForTelegramUser(id)}`
}
/**
+182 -88
View File
@@ -43,8 +43,9 @@ function addTag(tags, entity, tag, attrs, priority = 0) {
*
* @param {string} message The plaintext message.
* @param {Array} entities The Telegram formatting entities.
* @param {MautrixTelegram} app The app main class instance to use when reformatting mentions.
*/
function telegramToMatrix(message, entities) {
function telegramToMatrix(message, entities, app) {
const tags = []
// Decreasing priority counter used to ensure that formattings right next to eachother don't flip like this:
// *bold*_italic_ --> <strong>bold<em></strong>italic</em>
@@ -52,7 +53,7 @@ function telegramToMatrix(message, entities) {
// Convert Telegram formatting entities into a weird custom indexed HTML tag format thingy.
for (const entity of entities) {
let url, tag
let url, tag, mxid
switch (entity._) {
case "messageEntityBold":
tag = tag || "strong"
@@ -72,9 +73,44 @@ function telegramToMatrix(message, entities) {
// TODO bridge bot commands differently?
addTag(tags, entity, "font", "color=\"blue\"", --pc)
break
case "messageEntityMentionName":
let user = app.matrixUsersByTelegramID.get(entity.user_id)
if (!user) {
// TODO this loop step should be made useless
for (const userByMXID of app.matrixUsersByID.values()) {
if (userByMXID.telegramUserID === entity.user_id) {
user = userByMXID
app.matrixUsersByTelegramID.set(userByMXID.telegramUserID, userByMXID)
break
}
}
}
mxid = user ?
user.userID :
app.getMXIDForTelegramUser(entity.user_id)
case "messageEntityMention":
// TODO bridge mentions properly?
addTag(tags, entity, "font", "color=\"red\"", --pc)
if (!mxid) {
const username = message.substr(entity.offset + 1, entity.length - 1)
for (const userByMXID of app.matrixUsersByID.values()) {
if (userByMXID._telegramPuppet && userByMXID._telegramPuppet.data.username === username) {
mxid = userByMXID.userID
break
}
}
if (!mxid) {
for (const userByID of app.telegramUsersByID.values()) {
if (userByID.username === username) {
mxid = userByID.mxid
break
}
}
}
}
if (!mxid) {
continue
}
addTag(tags, entity, "a", `href="https://matrix.to/#/${mxid}"`)
break
case "messageEntityEmail":
url = url || `mailto:${message.substr(entity.offset, entity.length)}`
@@ -101,22 +137,125 @@ function telegramToMatrix(message, entities) {
}
// Formatting that is converted back to text
const paragraphs = /<p>(.*?)<\/p>/g
const headers = /<h([0-6])>(.*?)<\/h[0-6]>/g
const unorderedLists = /<ul>((.|\n)*?)<\/ul>/g
const orderedLists = /<ol>((.|\n)*?)<\/ol>/g
const listEntries = /<li>(.*?)<\/li>/g
const linebreaks = /<br(.*?)>(\n)?/g
const paragraphs = /<p>([^]*?)<\/p>/g
const headers = /<h([0-6])>([^]*?)<\/h[0-6]>/g
const unorderedLists = /<ul>([^]*?)<\/ul>/g
const orderedLists = /<ol>([^]*?)<\/ol>/g
const listEntries = /<li>([^]*?)<\/li>/g
// Formatting that is converted to Telegram entity formatting
const boldText = /<strong>((.|\n)*?)<\/strong>/g
const italicText = /<em>((.|\n)*?)<\/em>/g
const codeblocks = /<pre><code>((.|\n)*?)<\/code><\/pre>/g
const codeblocksWithSyntaxHighlight = /<pre><code class="language-(.*?)">((.|\n)*?)<\/code><\/pre>/g
const inlineCode = /<code>(.*?)<\/code>/g
const emailAddresses = /<a href="mailto:(.*?)">((.|\n)*?)<\/a>/g
const hyperlinks = /<a href="(.*?)">((.|\n)*?)<\/a>/g
const boldText = /<(strong)>()([^]*?)<\/strong>/g
const italicText = /<(em)>()([^]*?)<\/em>/g
const codeblocks = /<(pre><code)>()([^]*?)<\/code><\/pre>/g
const codeblocksWithSyntaxHighlight = /<(pre><code class)="language-(.*?)">([^]*?)<\/code><\/pre>/g
const inlineCode = /<(code)>()(.*?)<\/code>/g
const emailAddresses = /<a href="(mailto):(.*?)">([^]*?)<\/a>/g
const mentions = /<a href="https:\/\/(matrix\.to)\/#\/(@.+?)">(.*?)<\/a>/g
const hyperlinks = /<(a href)="(.*?)">([^]*?)<\/a>/g
const REGEX_CAPTURE_GROUP_COUNT = 3
const linebreaks = /<br(.*?)>(\n)?/g
RegExp.any = function(...regexes) {
let components = []
for (const regex of regexes) {
if (regex instanceof RegExp) {
components = components.concat(regex._components || regex.source)
}
}
return new RegExp(`(?:${components.join(")|(?:")})`)
}
const regexMonster = RegExp.any(//"g",
boldText, italicText, codeblocks, codeblocksWithSyntaxHighlight,
inlineCode, emailAddresses, mentions, hyperlinks)
const NUMBER_OF_REGEXES_EATEN_BY_MONSTER = 8
function regexMonsterMatchParser(match) {
match.pop() // Remove full string
const index = match.pop()
let identifier, arg, text
for (let i = 0; i < NUMBER_OF_REGEXES_EATEN_BY_MONSTER; i++) {
if (match[i * REGEX_CAPTURE_GROUP_COUNT]) {
identifier = match[i * REGEX_CAPTURE_GROUP_COUNT]
arg = match[(i * REGEX_CAPTURE_GROUP_COUNT) + 1]
text = match[(i * REGEX_CAPTURE_GROUP_COUNT) + 2]
}
}
return { index, identifier, arg, text }
}
function regexMonsterHandler(identifier, arg, text, index, app) {
let entity, entityClass, argField
switch (identifier) {
case "strong":
entityClass = "Bold"
break
case "em":
entityClass = "Italic"
break
case "pre><code":
case "pre><code class":
argField = "language"
entityClass = "Pre"
break
case "code":
entityClass = "Code"
break
case "mailto":
entityClass = "email"
// Force text to be the email address
text = arg
break
case "a href":
if (arg === text) {
entityClass = "Url"
} else {
entityClass = "TextUrl"
argField = "url"
}
case "matrix.to":
if (app) {
const match = app.usernameRegex.exec(arg)
if (!match || match.length < 2) {
break
}
const userID = match[1]
const user = app.telegramUsersByID.get(+userID)
if (!user) {
break
}
if (user.username) {
entityClass = "Mention"
text = `@${user.username}`
} else {
text = user.getDisplayName()
entity = {
_: "inputMessageEntityMentionName",
offset: index,
length: text.length,
user_id: {
_: "inputUser",
user_id: user.id,
},
}
}
}
break
}
if (!entity && entityClass) {
entity = {
_: `messageEntity${entityClass}`,
offset: index,
length: text.length,
}
if (argField) {
entity[argField] = arg
}
}
return { replacement: text, entity }
}
/**
* Convert a Matrix HTML-formatted message to a Telegram entity-formatted message.
@@ -124,86 +263,41 @@ const linebreaks = /<br(.*?)>(\n)?/g
* @param {string} message The HTML-formatted message.
* @returns {{message: string, entities: Array}} The Telegram entity-formatted message.
*/
function matrixToTelegram(message) {
function matrixToTelegram(message, app) {
const entities = []
// First replace all the things that don't get converted into Telegram entities
message = message.replace(linebreaks, "\n")
message = message.replace(paragraphs, "$1\n")
message = message.replace(headers, (_, count, text) => `${"#".repeat(count)} ${text}`)
message = message.replace(unorderedLists, (_, list) => {
return list.replace(listEntries, "- $1")
})
message = message.replace(unorderedLists, (_, list) => list.replace(listEntries, "- $1"))
message = message.replace(orderedLists, (_, list) => {
let n = 0
return list.replace(listEntries, (fullMatch, text) => `${++n}. ${text}`)
})
message = message.replace(boldText, (_, text, index) => {
entities.push({
_: "messageEntityBold",
offset: index,
length: text.length,
})
return text
})
message = message.replace(italicText, (_, text, index) => {
entities.push({
_: "messageEntityItalic",
offset: index,
length: text.length,
})
return text
})
message = message.replace(codeblocks, (_, text, index) => {
entities.push({
_: "messageEntityPre",
offset: index,
length: text.length,
language: "",
})
return text
})
message = message.replace(codeblocksWithSyntaxHighlight, (_, language, text, index) => {
entities.push({
_: "messageEntityPre",
offset: index,
length: text.length,
language,
})
return text
})
message = message.replace(inlineCode, (_, text, index) => {
entities.push({
_: "messageEntityCode",
offset: index,
length: text.length,
})
return text
})
message = message.replace(emailAddresses, (_, address, text, index) => {
entities.push({
_: "messageEntityEmail",
offset: index,
length: address.length,
})
return address
})
message = message.replace(hyperlinks, (_, url, text, index) => {
if (url === text) {
entities.push({
_: "messageEntityUrl",
offset: index,
length: text.length,
})
} else {
entities.push({
_: "messageEntityTextUrl",
offset: index,
length: text.length,
url,
})
const regexMonsterReplacer = (match, ...args) => {
const { index, identifier, arg, text } = regexMonsterMatchParser(args)
if (!identifier) {
// This shouldn't happen
console.warn(`Warning: Match found but parsing failed for match "${match}"`)
return match
}
return text
})
console.log(entities)
const { replacement, entity } = regexMonsterHandler(identifier, arg, text, index, app)
if (entity) {
entities.push(entity)
}
return replacement || text
}
// We replace matches iteratively to make sure the indexes of matches are correct.
let oldMessage = message
message = message.replace(regexMonster, regexMonsterReplacer)
while (oldMessage !== message) {
oldMessage = message
message = message.replace(regexMonster, regexMonsterReplacer)
}
return { message, entities }
}
+2 -2
View File
@@ -218,7 +218,7 @@ class Portal {
if (evt.text && evt.text.length > 0) {
if (evt.entities) {
evt.html = formatter.telegramToMatrix(evt.text, evt.entities)
evt.html = formatter.telegramToMatrix(evt.text, evt.entities, this.app)
sender.sendHTML(this.roomID, evt.html)
} else {
sender.sendText(this.roomID, evt.text)
@@ -252,7 +252,7 @@ class Portal {
switch (evt.content.msgtype) {
case "m.text":
if (evt.content.format === "org.matrix.custom.html") {
const { message, entities } = formatter.matrixToTelegram(evt.content.formatted_body)
const { message, entities } = formatter.matrixToTelegram(evt.content.formatted_body, this.app)
await sender.telegramPuppet.sendMessage(this.peer, message, entities)
} else {
await sender.telegramPuppet.sendMessage(this.peer, evt.content.body)