Improve Telegram message deduplication
* Add pre-send message database check for deduplication * Make dedup cache queue length configurable
This commit is contained in:
@@ -196,6 +196,9 @@ class Config(DictWithRecursion):
|
||||
copy("bridge.catch_up")
|
||||
copy("bridge.sync_with_custom_puppets")
|
||||
|
||||
copy("bridge.deduplication.pre_db_check")
|
||||
copy("bridge.deduplication.cache_queue_length")
|
||||
|
||||
if "bridge.message_formats.m_text" in self:
|
||||
del self["bridge.message_formats"]
|
||||
copy_dict("bridge.message_formats", override_existing_map=False)
|
||||
|
||||
@@ -66,12 +66,19 @@ class Portal:
|
||||
az = None # type: AppService
|
||||
bot = None # type: Bot
|
||||
loop = None # type: asyncio.AbstractEventLoop
|
||||
|
||||
filter_mode = None # type: str
|
||||
filter_list = None # type: List[str]
|
||||
|
||||
bridge_notices = False # type: bool
|
||||
|
||||
dedup_pre_db_check = False # type: bool
|
||||
dedup_cache_queue_length = 20 # type: int
|
||||
|
||||
alias_template = None # type: str
|
||||
mx_alias_regex = None # type: Pattern
|
||||
hs_domain = None # type: str
|
||||
|
||||
by_mxid = {} # type: Dict[str, Portal]
|
||||
by_tgid = {} # type: Dict[Tuple[int, int], Portal]
|
||||
|
||||
@@ -191,7 +198,7 @@ class Portal:
|
||||
|
||||
self._dedup_action.append(evt_hash)
|
||||
|
||||
if len(self._dedup_action) > 20:
|
||||
if len(self._dedup_action) > self.dedup_cache_queue_length:
|
||||
self._dedup_action.popleft()
|
||||
return False
|
||||
|
||||
@@ -221,7 +228,7 @@ class Portal:
|
||||
self._dedup_mxid[evt_hash] = mxid
|
||||
self._dedup.append(evt_hash)
|
||||
|
||||
if len(self._dedup) > 20:
|
||||
if len(self._dedup) > self.dedup_cache_queue_length:
|
||||
del self._dedup_mxid[self._dedup.popleft()]
|
||||
return None
|
||||
|
||||
@@ -1330,6 +1337,8 @@ class Portal:
|
||||
|
||||
msg = DBMessage.query.get((evt.id, tg_space))
|
||||
if not msg:
|
||||
self.log.info(f"Didn't find edited message {evt.id}@{tg_space} (src {source.tgid}) "
|
||||
"in database.")
|
||||
# Oh crap
|
||||
return
|
||||
msg.mxid = mxid
|
||||
@@ -1363,6 +1372,15 @@ class Portal:
|
||||
self.db.commit()
|
||||
return
|
||||
|
||||
if self.dedup_pre_db_check and self.peer_type == "channel":
|
||||
msg = DBMessage.query.get((evt.id, tg_space))
|
||||
if msg:
|
||||
self.log.debug(f"Ignoring message {evt.id} (src {source.tgid}) as it was already"
|
||||
f"handled into {msg.mxid}. This duplicate was catched in the db "
|
||||
"check. If you get this message often, consider increasing"
|
||||
"bridge.deduplication.cache_queue_length in the config.")
|
||||
return
|
||||
|
||||
if sender and not sender.displayname:
|
||||
self.log.debug(f"Telegram user {sender.tgid} sent a message, but doesn't have a"
|
||||
"displayname, updating info...")
|
||||
@@ -1416,10 +1434,16 @@ class Portal:
|
||||
DBMessage.mxid == temporary_identifier) \
|
||||
.update({"mxid": mxid})
|
||||
except FlushError as e:
|
||||
self.log.exception(f"{e.__class__.__name__} while saving message mapping.")
|
||||
self.log.exception(f"{e.__class__.__name__} while saving message mapping. "
|
||||
"This might mean that an update was handled after it left the "
|
||||
"dedup cache queue. You can try enabling bridge.deduplication."
|
||||
"pre_db_check in the config.")
|
||||
await intent.redact(self.mxid, mxid)
|
||||
except (IntegrityError, InvalidRequestError) as e:
|
||||
self.log.exception(f"{e.__class__.__name__} while saving message mapping.")
|
||||
self.log.exception(f"{e.__class__.__name__} while saving message mapping. "
|
||||
"This might mean that an update was handled after it left the "
|
||||
"dedup cache queue. You can try enabling bridge.deduplication."
|
||||
"pre_db_check in the config.")
|
||||
self.db.rollback()
|
||||
await intent.redact(self.mxid, mxid)
|
||||
|
||||
@@ -1740,6 +1764,8 @@ def init(context: Context):
|
||||
Portal.bridge_notices = config["bridge.bridge_notices"]
|
||||
Portal.filter_mode = config["bridge.filter.mode"]
|
||||
Portal.filter_list = config["bridge.filter.list"]
|
||||
Portal.dedup_pre_db_check = config["bridge.deduplication.pre_db_check"]
|
||||
Portal.dedup_cache_queue_length = config["bridge.deduplication.cache_queue_length"]
|
||||
Portal.alias_template = config.get("bridge.alias_template", "telegram_{groupname}")
|
||||
Portal.hs_domain = config["homeserver.domain"]
|
||||
Portal.mx_alias_regex = re.compile(
|
||||
|
||||
Reference in New Issue
Block a user