move gotd fork into repo. (#111)
- update to latest telegram layer - remove some references to fields in tg.Entities that don't exist in the schema - originally added here: https://github.com/beeper/td/commit/820929062a2ba0104397bc01235ab58a9cff780e - referenced here - https://github.com/mautrix/telegramgo/commit/124f0967ed195b5a380c9bd02e170ada9710dde3 - https://github.com/mautrix/telegramgo/commit/4205047aab2e0639217148b5d125bfaab668bd8e
This commit is contained in:
@@ -0,0 +1,129 @@
|
||||
package html
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// unescapeEntity reads an entity like "<" from b[src:] and writes the
|
||||
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
|
||||
// Precondition: b[src] == '&' && dst <= src.
|
||||
//
|
||||
// This is adaption of html.UnescapeString from Go sources.
|
||||
func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) {
|
||||
// i starts at 1 because we already know that s[0] == '&'.
|
||||
i, s := 1, b[src:]
|
||||
|
||||
if len(s) <= 1 {
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if s[i] == '#' {
|
||||
if len(s) <= 3 { // We need to have at least "&#.".
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
i++
|
||||
c := s[i]
|
||||
hex := false
|
||||
if c == 'x' || c == 'X' {
|
||||
hex = true
|
||||
i++
|
||||
}
|
||||
|
||||
x := '\x00'
|
||||
for i < len(s) {
|
||||
c = s[i]
|
||||
i++
|
||||
if hex {
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
x = 16*x + rune(c) - '0'
|
||||
continue
|
||||
case 'a' <= c && c <= 'f':
|
||||
x = 16*x + rune(c) - 'a' + 10
|
||||
continue
|
||||
case 'A' <= c && c <= 'F':
|
||||
x = 16*x + rune(c) - 'A' + 10
|
||||
continue
|
||||
}
|
||||
} else if '0' <= c && c <= '9' {
|
||||
x = 10*x + rune(c) - '0'
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if i <= 3 { // No characters matched.
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if x == 0 || x >= 0x10ffff {
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
}
|
||||
|
||||
// Consume the maximum number of characters possible, with the
|
||||
// consumed characters matching one of the named references.
|
||||
|
||||
for i < len(s) {
|
||||
c := s[i]
|
||||
i++
|
||||
// Lower-cased characters are more common in entities, so we check for them first.
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
var x rune
|
||||
tagEnd := i
|
||||
if i > 0 && s[tagEnd-1] == ';' {
|
||||
tagEnd--
|
||||
}
|
||||
switch string(s[1:tagEnd]) {
|
||||
case "lt":
|
||||
x = '<'
|
||||
case "gt":
|
||||
x = '>'
|
||||
case "amp":
|
||||
x = '&'
|
||||
case "quot":
|
||||
x = '"'
|
||||
}
|
||||
if x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
}
|
||||
|
||||
dst1, src1 = dst+i, src+i
|
||||
copy(b[dst:dst1], b[src:src1])
|
||||
return dst1, src1
|
||||
}
|
||||
|
||||
// telegramEscape implements Telegram BotAPI HTML unescape.
|
||||
func telegramUnescape(b []byte) []byte {
|
||||
for i, c := range b {
|
||||
if c == '&' {
|
||||
dst, src := unescapeEntity(b, i, i)
|
||||
for src < len(b) {
|
||||
c := b[src]
|
||||
if c == '&' {
|
||||
dst, src = unescapeEntity(b, dst, src)
|
||||
} else {
|
||||
b[dst] = c
|
||||
dst, src = dst+1, src+1
|
||||
}
|
||||
}
|
||||
return b[0:dst]
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
Reference in New Issue
Block a user