package html import "unicode/utf8" // unescapeEntity reads an entity like "<" from b[src:] and writes the // corresponding "<" to b[dst:], returning the incremented dst and src cursors. // Precondition: b[src] == '&' && dst <= src. // // This is adaption of html.UnescapeString from Go sources. func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { // i starts at 1 because we already know that s[0] == '&'. i, s := 1, b[src:] if len(s) <= 1 { b[dst] = b[src] return dst + 1, src + 1 } if s[i] == '#' { if len(s) <= 3 { // We need to have at least "&#.". b[dst] = b[src] return dst + 1, src + 1 } i++ c := s[i] hex := false if c == 'x' || c == 'X' { hex = true i++ } x := '\x00' for i < len(s) { c = s[i] i++ if hex { switch { case '0' <= c && c <= '9': x = 16*x + rune(c) - '0' continue case 'a' <= c && c <= 'f': x = 16*x + rune(c) - 'a' + 10 continue case 'A' <= c && c <= 'F': x = 16*x + rune(c) - 'A' + 10 continue } } else if '0' <= c && c <= '9' { x = 10*x + rune(c) - '0' continue } if c != ';' { i-- } break } if i <= 3 { // No characters matched. b[dst] = b[src] return dst + 1, src + 1 } if x == 0 || x >= 0x10ffff { b[dst] = b[src] return dst + 1, src + 1 } return dst + utf8.EncodeRune(b[dst:], x), src + i } // Consume the maximum number of characters possible, with the // consumed characters matching one of the named references. for i < len(s) { c := s[i] i++ // Lower-cased characters are more common in entities, so we check for them first. if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { continue } if c != ';' { i-- } break } var x rune tagEnd := i if i > 0 && s[tagEnd-1] == ';' { tagEnd-- } switch string(s[1:tagEnd]) { case "lt": x = '<' case "gt": x = '>' case "amp": x = '&' case "quot": x = '"' } if x != 0 { return dst + utf8.EncodeRune(b[dst:], x), src + i } dst1, src1 = dst+i, src+i copy(b[dst:dst1], b[src:src1]) return dst1, src1 } // telegramEscape implements Telegram BotAPI HTML unescape. func telegramUnescape(b []byte) []byte { for i, c := range b { if c == '&' { dst, src := unescapeEntity(b, i, i) for src < len(b) { c := b[src] if c == '&' { dst, src = unescapeEntity(b, dst, src) } else { b[dst] = c dst, src = dst+1, src+1 } } return b[0:dst] } } return b }