/ tests / gateway / test_pii_redaction.py
test_pii_redaction.py
  1  """Tests for PII redaction in gateway session context prompts."""
  2  
  3  from gateway.session import (
  4      SessionContext,
  5      SessionSource,
  6      build_session_context_prompt,
  7      _hash_id,
  8      _hash_sender_id,
  9      _hash_chat_id,
 10  )
 11  from gateway.config import Platform, HomeChannel
 12  
 13  
 14  # ---------------------------------------------------------------------------
 15  # Low-level helpers
 16  # ---------------------------------------------------------------------------
 17  
 18  class TestHashHelpers:
 19      def test_hash_id_deterministic(self):
 20          assert _hash_id("12345") == _hash_id("12345")
 21  
 22      def test_hash_id_12_hex_chars(self):
 23          h = _hash_id("user-abc")
 24          assert len(h) == 12
 25          assert all(c in "0123456789abcdef" for c in h)
 26  
 27      def test_hash_sender_id_prefix(self):
 28          assert _hash_sender_id("12345").startswith("user_")
 29          assert len(_hash_sender_id("12345")) == 17  # "user_" + 12
 30  
 31      def test_hash_chat_id_preserves_prefix(self):
 32          result = _hash_chat_id("telegram:12345")
 33          assert result.startswith("telegram:")
 34          assert "12345" not in result
 35  
 36      def test_hash_chat_id_no_prefix(self):
 37          result = _hash_chat_id("12345")
 38          assert len(result) == 12
 39          assert "12345" not in result
 40  
 41  
 42  # ---------------------------------------------------------------------------
 43  # Integration: build_session_context_prompt
 44  # ---------------------------------------------------------------------------
 45  
 46  def _make_context(
 47      user_id="user-123",
 48      user_name=None,
 49      chat_id="telegram:99999",
 50      platform=Platform.TELEGRAM,
 51      home_channels=None,
 52  ):
 53      source = SessionSource(
 54          platform=platform,
 55          chat_id=chat_id,
 56          chat_type="dm",
 57          user_id=user_id,
 58          user_name=user_name,
 59      )
 60      return SessionContext(
 61          source=source,
 62          connected_platforms=[platform],
 63          home_channels=home_channels or {},
 64      )
 65  
 66  
 67  class TestBuildSessionContextPromptRedaction:
 68      def test_no_redaction_by_default(self):
 69          ctx = _make_context(user_id="user-123")
 70          prompt = build_session_context_prompt(ctx)
 71          assert "user-123" in prompt
 72  
 73      def test_user_id_hashed_when_redact_pii(self):
 74          ctx = _make_context(user_id="user-123")
 75          prompt = build_session_context_prompt(ctx, redact_pii=True)
 76          assert "user-123" not in prompt
 77          assert "user_" in prompt  # hashed ID present
 78  
 79      def test_user_name_not_redacted(self):
 80          ctx = _make_context(user_id="user-123", user_name="Alice")
 81          prompt = build_session_context_prompt(ctx, redact_pii=True)
 82          assert "Alice" in prompt
 83          # user_id should not appear when user_name is present (name takes priority)
 84          assert "user-123" not in prompt
 85  
 86      def test_home_channel_id_hashed(self):
 87          hc = {
 88              Platform.TELEGRAM: HomeChannel(
 89                  platform=Platform.TELEGRAM,
 90                  chat_id="telegram:99999",
 91                  name="Home Chat",
 92              )
 93          }
 94          ctx = _make_context(home_channels=hc)
 95          prompt = build_session_context_prompt(ctx, redact_pii=True)
 96          assert "99999" not in prompt
 97          assert "telegram:" in prompt  # prefix preserved
 98          assert "Home Chat" in prompt  # name not redacted
 99  
100      def test_home_channel_id_preserved_without_redaction(self):
101          hc = {
102              Platform.TELEGRAM: HomeChannel(
103                  platform=Platform.TELEGRAM,
104                  chat_id="telegram:99999",
105                  name="Home Chat",
106              )
107          }
108          ctx = _make_context(home_channels=hc)
109          prompt = build_session_context_prompt(ctx, redact_pii=False)
110          assert "99999" in prompt
111  
112      def test_redaction_is_deterministic(self):
113          ctx = _make_context(user_id="+15551234567")
114          prompt1 = build_session_context_prompt(ctx, redact_pii=True)
115          prompt2 = build_session_context_prompt(ctx, redact_pii=True)
116          assert prompt1 == prompt2
117  
118      def test_different_ids_produce_different_hashes(self):
119          ctx1 = _make_context(user_id="user-A")
120          ctx2 = _make_context(user_id="user-B")
121          p1 = build_session_context_prompt(ctx1, redact_pii=True)
122          p2 = build_session_context_prompt(ctx2, redact_pii=True)
123          assert p1 != p2
124  
125      def test_discord_ids_not_redacted_even_with_flag(self):
126          """Discord needs real IDs for <@user_id> mentions."""
127          ctx = _make_context(user_id="123456789", platform=Platform.DISCORD)
128          prompt = build_session_context_prompt(ctx, redact_pii=True)
129          assert "123456789" in prompt
130  
131      def test_whatsapp_ids_redacted(self):
132          ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP)
133          prompt = build_session_context_prompt(ctx, redact_pii=True)
134          assert "+15551234567" not in prompt
135          assert "user_" in prompt
136  
137      def test_signal_ids_redacted(self):
138          ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL)
139          prompt = build_session_context_prompt(ctx, redact_pii=True)
140          assert "+15551234567" not in prompt
141          assert "user_" in prompt
142  
143      def test_slack_ids_not_redacted(self):
144          """Slack may need IDs for mentions too."""
145          ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK)
146          prompt = build_session_context_prompt(ctx, redact_pii=True)
147          assert "U12345ABC" in prompt