test_pii_redaction.py
1 """Tests for PII redaction in gateway session context prompts.""" 2 3 from gateway.session import ( 4 SessionContext, 5 SessionSource, 6 build_session_context_prompt, 7 _hash_id, 8 _hash_sender_id, 9 _hash_chat_id, 10 ) 11 from gateway.config import Platform, HomeChannel 12 13 14 # --------------------------------------------------------------------------- 15 # Low-level helpers 16 # --------------------------------------------------------------------------- 17 18 class TestHashHelpers: 19 def test_hash_id_deterministic(self): 20 assert _hash_id("12345") == _hash_id("12345") 21 22 def test_hash_id_12_hex_chars(self): 23 h = _hash_id("user-abc") 24 assert len(h) == 12 25 assert all(c in "0123456789abcdef" for c in h) 26 27 def test_hash_sender_id_prefix(self): 28 assert _hash_sender_id("12345").startswith("user_") 29 assert len(_hash_sender_id("12345")) == 17 # "user_" + 12 30 31 def test_hash_chat_id_preserves_prefix(self): 32 result = _hash_chat_id("telegram:12345") 33 assert result.startswith("telegram:") 34 assert "12345" not in result 35 36 def test_hash_chat_id_no_prefix(self): 37 result = _hash_chat_id("12345") 38 assert len(result) == 12 39 assert "12345" not in result 40 41 42 # --------------------------------------------------------------------------- 43 # Integration: build_session_context_prompt 44 # --------------------------------------------------------------------------- 45 46 def _make_context( 47 user_id="user-123", 48 user_name=None, 49 chat_id="telegram:99999", 50 platform=Platform.TELEGRAM, 51 home_channels=None, 52 ): 53 source = SessionSource( 54 platform=platform, 55 chat_id=chat_id, 56 chat_type="dm", 57 user_id=user_id, 58 user_name=user_name, 59 ) 60 return SessionContext( 61 source=source, 62 connected_platforms=[platform], 63 home_channels=home_channels or {}, 64 ) 65 66 67 class TestBuildSessionContextPromptRedaction: 68 def test_no_redaction_by_default(self): 69 ctx = _make_context(user_id="user-123") 70 prompt = build_session_context_prompt(ctx) 71 assert "user-123" in prompt 72 73 def test_user_id_hashed_when_redact_pii(self): 74 ctx = _make_context(user_id="user-123") 75 prompt = build_session_context_prompt(ctx, redact_pii=True) 76 assert "user-123" not in prompt 77 assert "user_" in prompt # hashed ID present 78 79 def test_user_name_not_redacted(self): 80 ctx = _make_context(user_id="user-123", user_name="Alice") 81 prompt = build_session_context_prompt(ctx, redact_pii=True) 82 assert "Alice" in prompt 83 # user_id should not appear when user_name is present (name takes priority) 84 assert "user-123" not in prompt 85 86 def test_home_channel_id_hashed(self): 87 hc = { 88 Platform.TELEGRAM: HomeChannel( 89 platform=Platform.TELEGRAM, 90 chat_id="telegram:99999", 91 name="Home Chat", 92 ) 93 } 94 ctx = _make_context(home_channels=hc) 95 prompt = build_session_context_prompt(ctx, redact_pii=True) 96 assert "99999" not in prompt 97 assert "telegram:" in prompt # prefix preserved 98 assert "Home Chat" in prompt # name not redacted 99 100 def test_home_channel_id_preserved_without_redaction(self): 101 hc = { 102 Platform.TELEGRAM: HomeChannel( 103 platform=Platform.TELEGRAM, 104 chat_id="telegram:99999", 105 name="Home Chat", 106 ) 107 } 108 ctx = _make_context(home_channels=hc) 109 prompt = build_session_context_prompt(ctx, redact_pii=False) 110 assert "99999" in prompt 111 112 def test_redaction_is_deterministic(self): 113 ctx = _make_context(user_id="+15551234567") 114 prompt1 = build_session_context_prompt(ctx, redact_pii=True) 115 prompt2 = build_session_context_prompt(ctx, redact_pii=True) 116 assert prompt1 == prompt2 117 118 def test_different_ids_produce_different_hashes(self): 119 ctx1 = _make_context(user_id="user-A") 120 ctx2 = _make_context(user_id="user-B") 121 p1 = build_session_context_prompt(ctx1, redact_pii=True) 122 p2 = build_session_context_prompt(ctx2, redact_pii=True) 123 assert p1 != p2 124 125 def test_discord_ids_not_redacted_even_with_flag(self): 126 """Discord needs real IDs for <@user_id> mentions.""" 127 ctx = _make_context(user_id="123456789", platform=Platform.DISCORD) 128 prompt = build_session_context_prompt(ctx, redact_pii=True) 129 assert "123456789" in prompt 130 131 def test_whatsapp_ids_redacted(self): 132 ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP) 133 prompt = build_session_context_prompt(ctx, redact_pii=True) 134 assert "+15551234567" not in prompt 135 assert "user_" in prompt 136 137 def test_signal_ids_redacted(self): 138 ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL) 139 prompt = build_session_context_prompt(ctx, redact_pii=True) 140 assert "+15551234567" not in prompt 141 assert "user_" in prompt 142 143 def test_slack_ids_not_redacted(self): 144 """Slack may need IDs for mentions too.""" 145 ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK) 146 prompt = build_session_context_prompt(ctx, redact_pii=True) 147 assert "U12345ABC" in prompt