From 0dc56aad1c912d6c33cc68479a834c8e40c0cffc Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sat, 19 Sep 2020 01:04:34 +0300 Subject: [PATCH] Update prometheus stuff --- mautrix_telegram/abstract_user.py | 21 ++++++++++----------- mautrix_telegram/matrix.py | 12 ------------ mautrix_telegram/user.py | 15 +++++++++++++++ requirements.txt | 2 +- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/mautrix_telegram/abstract_user.py b/mautrix_telegram/abstract_user.py index 4db08fd2..980473ba 100644 --- a/mautrix_telegram/abstract_user.py +++ b/mautrix_telegram/abstract_user.py @@ -37,6 +37,7 @@ from mautrix.types import UserID, PresenceState from mautrix.errors import MatrixError from mautrix.appservice import AppService from mautrix.util.logging import TraceLogger +from mautrix.util.opt_prometheus import Histogram, Counter from alchemysession import AlchemySessionContainer from . import portal as po, puppet as pu, __version__ @@ -57,14 +58,10 @@ UpdateMessage = Union[UpdateShortChatMessage, UpdateShortMessage, UpdateNewChann UpdateNewMessage, UpdateEditMessage, UpdateEditChannelMessage] UpdateMessageContent = Union[UpdateShortMessage, UpdateShortChatMessage, Message, MessageService] -try: - from prometheus_client import Histogram - - UPDATE_TIME = Histogram("telegram_update", "Time spent processing Telegram updates", - ["update_type"]) -except ImportError: - Histogram = None - UPDATE_TIME = None +UPDATE_TIME = Histogram("bridge_telegram_update", "Time spent processing Telegram updates", + ("update_type",)) +UPDATE_ERRORS = Counter("bridge_telegram_update_error", + "Number of fatal errors while handling Telegram updates", ("update_type",)) class AbstractUser(ABC): @@ -191,13 +188,14 @@ class AbstractUser(ABC): async def _update_catch(self, update: TypeUpdate) -> None: start_time = time.time() + update_type = type(update).__name__ try: if not await self.update(update): await self._update(update) except Exception: self.log.exception(f"Failed to handle Telegram update {update}") - if UPDATE_TIME: - UPDATE_TIME.labels(update_type=type(update).__name__).observe(time.time() - start_time) + UPDATE_ERRORS.labels(update_type=update_type).inc() + UPDATE_TIME.labels(update_type=update_type).observe(time.time() - start_time) @property @abstractmethod @@ -396,7 +394,8 @@ class AbstractUser(ABC): portal = po.Portal.get_by_entity(update.to_id, receiver_id=self.tgid) sender = pu.Puppet.get(update.from_id) if update.from_id else None else: - self.log.warning(f"Unexpected message type in User#get_message_details: {type(update)}") + self.log.warning("Unexpected message type in User#get_message_details: " + f"{type(update)}") return update, None, None return update, sender, portal diff --git a/mautrix_telegram/matrix.py b/mautrix_telegram/matrix.py index f9558006..626ed1fa 100644 --- a/mautrix_telegram/matrix.py +++ b/mautrix_telegram/matrix.py @@ -30,14 +30,6 @@ if TYPE_CHECKING: from .context import Context from .bot import Bot -try: - from prometheus_client import Histogram - - EVENT_TIME = Histogram("matrix_event", "Time spent processing Matrix events", ["event_type"]) -except ImportError: - Histogram = None - EVENT_TIME = None - RoomMetaStateEventContent = Union[RoomNameStateEventContent, RoomAvatarStateEventContent, RoomTopicStateEventContent] @@ -410,7 +402,3 @@ class MatrixHandler(BaseMatrixHandler): elif evt.type == EventType.ROOM_TOMBSTONE: await self.handle_room_upgrade(evt.room_id, evt.sender, evt.content.replacement_room, evt.event_id) - - async def log_event_handle_duration(self, evt: Event, duration: float) -> None: - if EVENT_TIME: - EVENT_TIME.labels(event_type=str(evt.type)).observe(duration) diff --git a/mautrix_telegram/user.py b/mautrix_telegram/user.py index 90289eed..32254df9 100644 --- a/mautrix_telegram/user.py +++ b/mautrix_telegram/user.py @@ -31,6 +31,7 @@ from mautrix.errors import MatrixRequestError from mautrix.types import UserID, RoomID from mautrix.bridge import BaseUser from mautrix.util.logging import TraceLogger +from mautrix.util.opt_prometheus import Enum from .types import TelegramID from .db import User as DBUser, Portal as DBPortal @@ -45,6 +46,11 @@ config: Optional['Config'] = None SearchResult = NamedTuple('SearchResult', puppet='pu.Puppet', similarity=int) +METRIC_LOGGED_IN = Enum('bridge_logged_in', 'Bridge Logged in', states=["true", "false"], + labelnames=("tgid",)) +METRIC_CONNECTED = Enum('bridge_connected', 'Bridge Connected', states=["true", "false"], + labelnames=("tgid",)) + class User(AbstractUser, BaseUser): log: TraceLogger = logging.getLogger("mau.user") @@ -193,15 +199,21 @@ class User(AbstractUser, BaseUser): async def start(self, delete_unless_authenticated: bool = False) -> 'User': await super().start() + METRIC_CONNECTED.labels(tgid=self.tgid).state("true") if await self.is_logged_in(): self.log.debug(f"Ensuring post_login() for {self.name}") asyncio.ensure_future(self.post_login(), loop=self.loop) elif delete_unless_authenticated: self.log.debug(f"Unauthenticated user {self.name} start()ed, deleting session...") await self.client.disconnect() + METRIC_CONNECTED.labels(tgid=self.tgid).state("false") self.client.session.delete() return self + async def stop(self) -> None: + await super().stop() + METRIC_CONNECTED.labels(tgid=self.tgid).state("true") + async def post_login(self, info: TLUser = None, first_login: bool = False) -> None: try: await self.update_info(info) @@ -209,6 +221,8 @@ class User(AbstractUser, BaseUser): self.log.exception("Failed to update telegram account info") return + METRIC_LOGGED_IN.labels(tgid=self.tgid).state("true") + try: puppet = pu.Puppet.get(self.tgid) if puppet.custom_mxid != self.mxid and puppet.can_auto_login(self.mxid): @@ -300,6 +314,7 @@ class User(AbstractUser, BaseUser): if not ok: return False self.delete() + METRIC_LOGGED_IN.labels(tgid=self.tgid).state("false") return True def _search_local(self, query: str, max_results: int = 5, min_similarity: int = 45 diff --git a/requirements.txt b/requirements.txt index c04a5796..e9806a2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,6 @@ ruamel.yaml>=0.15.35,<0.17 python-magic>=0.4,<0.5 commonmark>=0.8,<0.10 aiohttp>=3,<4 -mautrix>=0.7.3,<0.8 +mautrix>=0.7.4,<0.8 telethon>=1.16,<1.17 telethon-session-sqlalchemy>=0.2.14,<0.3