test_telegram_mention_boundaries.py
1 """Tests for Telegram bot mention detection (bug #12545). 2 3 The old implementation used a naive substring check 4 (`f"@{bot_username}" in text.lower()`), which incorrectly matched partial 5 substrings like 'foo@hermes_bot.example'. 6 7 Detection now relies entirely on the MessageEntity objects Telegram's server 8 emits for real mentions. A bare `@username` substring in message text without 9 a corresponding `MENTION` entity is NOT a mention — this correctly ignores 10 @handles that appear inside URLs, code blocks, email-like strings, or quoted 11 text, because Telegram's parser does not emit mention entities for any of 12 those contexts. 13 """ 14 from types import SimpleNamespace 15 16 from gateway.config import Platform, PlatformConfig 17 from gateway.platforms.telegram import TelegramAdapter 18 19 20 def _make_adapter(): 21 adapter = object.__new__(TelegramAdapter) 22 adapter.platform = Platform.TELEGRAM 23 adapter.config = PlatformConfig(enabled=True, token="***", extra={}) 24 adapter._bot = SimpleNamespace(id=999, username="hermes_bot") 25 return adapter 26 27 28 def _mention_entity(text, mention="@hermes_bot"): 29 """Build a MENTION entity pointing at a literal `@username` in `text`.""" 30 offset = text.index(mention) 31 return SimpleNamespace(type="mention", offset=offset, length=len(mention)) 32 33 34 def _text_mention_entity(offset, length, user_id): 35 """Build a TEXT_MENTION entity (used when the target user has no public @handle).""" 36 return SimpleNamespace( 37 type="text_mention", 38 offset=offset, 39 length=length, 40 user=SimpleNamespace(id=user_id), 41 ) 42 43 44 def _message(text=None, caption=None, entities=None, caption_entities=None): 45 return SimpleNamespace( 46 text=text, 47 caption=caption, 48 entities=entities or [], 49 caption_entities=caption_entities or [], 50 message_thread_id=None, 51 chat=SimpleNamespace(id=-100, type="group"), 52 reply_to_message=None, 53 ) 54 55 56 class TestRealMentionsAreDetected: 57 """A real Telegram mention always comes with a MENTION entity — detect those.""" 58 59 def test_mention_at_start_of_message(self): 60 adapter = _make_adapter() 61 text = "@hermes_bot hello world" 62 msg = _message(text=text, entities=[_mention_entity(text)]) 63 assert adapter._message_mentions_bot(msg) is True 64 65 def test_mention_mid_sentence(self): 66 adapter = _make_adapter() 67 text = "hey @hermes_bot, can you help?" 68 msg = _message(text=text, entities=[_mention_entity(text)]) 69 assert adapter._message_mentions_bot(msg) is True 70 71 def test_mention_at_end_of_message(self): 72 adapter = _make_adapter() 73 text = "thanks for looking @hermes_bot" 74 msg = _message(text=text, entities=[_mention_entity(text)]) 75 assert adapter._message_mentions_bot(msg) is True 76 77 def test_mention_in_caption(self): 78 adapter = _make_adapter() 79 caption = "photo for @hermes_bot" 80 msg = _message(caption=caption, caption_entities=[_mention_entity(caption)]) 81 assert adapter._message_mentions_bot(msg) is True 82 83 def test_text_mention_entity_targets_bot(self): 84 """TEXT_MENTION is Telegram's entity type for @FirstName -> user without a public handle.""" 85 adapter = _make_adapter() 86 msg = _message(text="hey you", entities=[_text_mention_entity(4, 3, user_id=999)]) 87 assert adapter._message_mentions_bot(msg) is True 88 89 90 class TestSubstringFalsePositivesAreRejected: 91 """Bare `@bot_username` substrings without a MENTION entity must NOT match. 92 93 These are all inputs where the OLD substring check returned True incorrectly. 94 A word-boundary regex would still over-match some of these (code blocks, 95 URLs). Entity-based detection handles them all correctly because Telegram's 96 parser does not emit mention entities for non-mention contexts. 97 """ 98 99 def test_email_like_substring(self): 100 """bug #12545 exact repro: 'foo@hermes_bot.example'.""" 101 adapter = _make_adapter() 102 msg = _message(text="email me at foo@hermes_bot.example") 103 assert adapter._message_mentions_bot(msg) is False 104 105 def test_hostname_substring(self): 106 adapter = _make_adapter() 107 msg = _message(text="contact user@hermes_bot.domain.com") 108 assert adapter._message_mentions_bot(msg) is False 109 110 def test_superstring_username(self): 111 """`@hermes_botx` is a different username; Telegram would emit a mention 112 entity for `@hermes_botx`, not `@hermes_bot`.""" 113 adapter = _make_adapter() 114 msg = _message(text="@hermes_botx hello") 115 assert adapter._message_mentions_bot(msg) is False 116 117 def test_underscore_suffix_substring(self): 118 adapter = _make_adapter() 119 msg = _message(text="see @hermes_bot_admin for help") 120 assert adapter._message_mentions_bot(msg) is False 121 122 def test_substring_inside_url_without_entity(self): 123 """@handle inside a URL produces a URL entity, not a MENTION entity.""" 124 adapter = _make_adapter() 125 msg = _message(text="see https://example.com/@hermes_bot for details") 126 assert adapter._message_mentions_bot(msg) is False 127 128 def test_substring_inside_code_block_without_entity(self): 129 """Telegram doesn't emit mention entities inside code/pre entities.""" 130 adapter = _make_adapter() 131 msg = _message(text="use the string `@hermes_bot` in config") 132 assert adapter._message_mentions_bot(msg) is False 133 134 def test_plain_text_with_no_at_sign(self): 135 adapter = _make_adapter() 136 msg = _message(text="just a normal group message") 137 assert adapter._message_mentions_bot(msg) is False 138 139 def test_email_substring_in_caption(self): 140 adapter = _make_adapter() 141 msg = _message(caption="foo@hermes_bot.example") 142 assert adapter._message_mentions_bot(msg) is False 143 144 145 class TestEntityEdgeCases: 146 """Malformed or mismatched entities should not crash or over-match.""" 147 148 def test_mention_entity_for_different_username(self): 149 adapter = _make_adapter() 150 text = "@someone_else hi" 151 msg = _message(text=text, entities=[_mention_entity(text, mention="@someone_else")]) 152 assert adapter._message_mentions_bot(msg) is False 153 154 def test_text_mention_entity_for_different_user(self): 155 adapter = _make_adapter() 156 msg = _message(text="hi there", entities=[_text_mention_entity(0, 2, user_id=12345)]) 157 assert adapter._message_mentions_bot(msg) is False 158 159 def test_malformed_entity_with_negative_offset(self): 160 adapter = _make_adapter() 161 msg = _message(text="@hermes_bot hi", 162 entities=[SimpleNamespace(type="mention", offset=-1, length=11)]) 163 assert adapter._message_mentions_bot(msg) is False 164 165 def test_malformed_entity_with_zero_length(self): 166 adapter = _make_adapter() 167 msg = _message(text="@hermes_bot hi", 168 entities=[SimpleNamespace(type="mention", offset=0, length=0)]) 169 assert adapter._message_mentions_bot(msg) is False 170 171 172 class TestCaseInsensitivity: 173 """Telegram usernames are case-insensitive; the slice-compare normalizes both sides.""" 174 175 def test_uppercase_mention(self): 176 adapter = _make_adapter() 177 text = "hi @HERMES_BOT" 178 msg = _message(text=text, entities=[_mention_entity(text, mention="@HERMES_BOT")]) 179 assert adapter._message_mentions_bot(msg) is True 180 181 def test_mixed_case_mention(self): 182 adapter = _make_adapter() 183 text = "hi @Hermes_Bot" 184 msg = _message(text=text, entities=[_mention_entity(text, mention="@Hermes_Bot")]) 185 assert adapter._message_mentions_bot(msg) is True