Cradicle Explorer

/ tests / test_rsa_validator.py
test_rsa_validator.py
  1  """RSA validator テスト
  2  
  3  _rsa_validator.pyの純粋な検証ロジックをテストする。
  4  DB/API不要でテスト可能。
  5  """
  6  
  7  from __future__ import annotations
  8  
  9  import pytest
 10  
 11  from mureo.google_ads._rsa_validator import (
 12      DESCRIPTION_MAX_WIDTH,
 13      HEADLINE_MAX_WIDTH,
 14      AdStrengthResult,
 15      RSAValidationResult,
 16      _bigram_similarity,
 17      _check_headline_diversity,
 18      _check_keyword_relevance,
 19      _check_prohibited,
 20      _has_synonym_overlap,
 21      _sanitize_text,
 22      _strip_match_type,
 23      display_width,
 24      predict_ad_strength,
 25      validate_rsa_texts,
 26  )
 27  
 28  
 29  # ---------------------------------------------------------------------------
 30  # display_width
 31  # ---------------------------------------------------------------------------
 32  
 33  
 34  @pytest.mark.unit
 35  class TestDisplayWidth:
 36      def test_半角英数(self) -> None:
 37          assert display_width("abc") == 3
 38  
 39      def test_全角文字(self) -> None:
 40          assert display_width("テスト") == 6
 41  
 42      def test_混在(self) -> None:
 43          assert display_width("ABCテスト") == 9
 44  
 45      def test_空文字(self) -> None:
 46          assert display_width("") == 0
 47  
 48      def test_見出し上限(self) -> None:
 49          assert HEADLINE_MAX_WIDTH == 30
 50  
 51      def test_説明文上限(self) -> None:
 52          assert DESCRIPTION_MAX_WIDTH == 90
 53  
 54  
 55  # ---------------------------------------------------------------------------
 56  # _sanitize_text
 57  # ---------------------------------------------------------------------------
 58  
 59  
 60  @pytest.mark.unit
 61  class TestSanitizeText:
 62      def test_連続感嘆符の縮約(self) -> None:
 63          text, fixes = _sanitize_text("素晴らしい！！！")
 64          assert text == "素晴らしい！"
 65          assert any("punctuation" in f.lower() for f in fixes)
 66  
 67      def test_連続疑問符の縮約(self) -> None:
 68          text, fixes = _sanitize_text("本当？？？")
 69          assert text == "本当？"
 70  
 71      def test_装飾記号の縮約(self) -> None:
 72          text, fixes = _sanitize_text("★★★限定")
 73          assert text == "★限定"
 74          assert any("decorative symbol" in f.lower() for f in fixes)
 75  
 76      def test_全角スペースの正規化(self) -> None:
 77          text, fixes = _sanitize_text("商品\u3000\u3000紹介")
 78          assert text == "商品 紹介"
 79          assert any("full-width space" in f.lower() for f in fixes)
 80  
 81      def test_先頭末尾記号の除去(self) -> None:
 82          text, fixes = _sanitize_text("！見出し。")
 83          assert text == "見出し"
 84          assert any("leading" in f.lower() for f in fixes)
 85  
 86      def test_半角カタカナの変換(self) -> None:
 87          text, fixes = _sanitize_text("ｷｰﾜｰﾄﾞ")
 88          assert text == "キーワード"
 89          assert any("half-width katakana" in f.lower() for f in fixes)
 90  
 91      def test_絵文字の除去(self) -> None:
 92          text, fixes = _sanitize_text("限定セール\U0001f525")
 93          assert text == "限定セール"
 94          assert any("emoji" in f.lower() for f in fixes)
 95  
 96      def test_正常テキストは修正なし(self) -> None:
 97          text, fixes = _sanitize_text("正常な広告テキスト")
 98          assert text == "正常な広告テキスト"
 99          assert fixes == []
100  
101      def test_超長テキストの切り詰め(self) -> None:
102          long_text = "あ" * 300
103          text, fixes = _sanitize_text(long_text)
104          assert len(text) <= 200
105          assert any("truncated" in f.lower() for f in fixes)
106  
107  
108  # ---------------------------------------------------------------------------
109  # _check_prohibited
110  # ---------------------------------------------------------------------------
111  
112  
113  @pytest.mark.unit
114  class TestCheckProhibited:
115      def test_最上級表現(self) -> None:
116          warnings = _check_prohibited("世界一の品質")
117          assert any("世界一" in w for w in warnings)
118  
119      def test_価格系表現(self) -> None:
120          warnings = _check_prohibited("最安値で提供")
121          assert any("最安値" in w for w in warnings)
122  
123      def test_効果保証表現(self) -> None:
124          warnings = _check_prohibited("効果保証付き")
125          assert any("効果保証" in w for w in warnings)
126  
127      def test_医療系表現(self) -> None:
128          warnings = _check_prohibited("これで治る")
129          assert any("治る" in w for w in warnings)
130  
131      def test_クリックベイト(self) -> None:
132          warnings = _check_prohibited("こちらをクリック")
133          assert any("clickbait" in w.lower() for w in warnings)
134  
135      def test_正常テキスト(self) -> None:
136          warnings = _check_prohibited("高品質な商品をお届け")
137          assert warnings == []
138  
139  
140  # ---------------------------------------------------------------------------
141  # validate_rsa_texts
142  # ---------------------------------------------------------------------------
143  
144  
145  @pytest.mark.unit
146  class TestValidateRsaTexts:
147      def test_正常なRSAテキスト(self) -> None:
148          result = validate_rsa_texts(
149              headlines=["見出し1", "見出し2", "見出し3"],
150              descriptions=["説明文1です", "説明文2です"],
151              final_url="https://example.com",
152          )
153  
154          assert isinstance(result, RSAValidationResult)
155          assert len(result.headlines) == 3
156          assert len(result.descriptions) == 2
157  
158      def test_URLなしでValueError(self) -> None:
159          with pytest.raises(ValueError, match="final_url"):
160              validate_rsa_texts(
161                  headlines=["見出し"],
162                  descriptions=["説明文"],
163                  final_url="",
164              )
165  
166      def test_不正URLでValueError(self) -> None:
167          with pytest.raises(ValueError, match="Invalid URL"):
168              validate_rsa_texts(
169                  headlines=["見出し"],
170                  descriptions=["説明文"],
171                  final_url="invalid-url",
172              )
173  
174      def test_重複見出しの除去(self) -> None:
175          result = validate_rsa_texts(
176              headlines=["同じ見出し", "同じ見出し", "別の見出し"],
177              descriptions=["説明文"],
178              final_url="https://example.com",
179          )
180  
181          assert len(result.headlines) == 2
182          assert any("duplicate" in w for w in result.warnings)
183  
184      def test_文字幅超過でValueError(self) -> None:
185          long_headline = "あ" * 20  # 全角20文字 = 幅40 > 30
186          with pytest.raises(ValueError, match="character limit"):
187              validate_rsa_texts(
188                  headlines=[long_headline],
189                  descriptions=["説明文"],
190                  final_url="https://example.com",
191              )
192  
193      def test_禁止表現で警告(self) -> None:
194          result = validate_rsa_texts(
195              headlines=["世界一の品質"],
196              descriptions=["説明文です"],
197              final_url="https://example.com",
198          )
199  
200          assert any("世界一" in w for w in result.warnings)
201  
202      def test_サニタイズ適用(self) -> None:
203          result = validate_rsa_texts(
204              headlines=["素晴らしい！！！"],
205              descriptions=["説明文です"],
206              final_url="https://example.com",
207          )
208  
209          assert result.headlines[0] == "素晴らしい！"
210  
211  
212  # ---------------------------------------------------------------------------
213  # _bigram_similarity
214  # ---------------------------------------------------------------------------
215  
216  
217  @pytest.mark.unit
218  class TestBigramSimilarity:
219      def test_同一文字列(self) -> None:
220          assert _bigram_similarity("テスト", "テスト") == 1.0
221  
222      def test_完全に異なる文字列(self) -> None:
223          sim = _bigram_similarity("あいう", "かきく")
224          assert sim == 0.0
225  
226      def test_短い文字列(self) -> None:
227          assert _bigram_similarity("a", "a") == 1.0
228          assert _bigram_similarity("a", "b") == 0.0
229  
230  
231  # ---------------------------------------------------------------------------
232  # _has_synonym_overlap
233  # ---------------------------------------------------------------------------
234  
235  
236  @pytest.mark.unit
237  class TestHasSynonymOverlap:
238      def test_同義語ペア検出(self) -> None:
239          assert _has_synonym_overlap("安い商品", "格安セール") is True
240  
241      def test_同義語なし(self) -> None:
242          assert _has_synonym_overlap("高品質", "高速配送") is False
243  
244  
245  # ---------------------------------------------------------------------------
246  # _check_headline_diversity
247  # ---------------------------------------------------------------------------
248  
249  
250  @pytest.mark.unit
251  class TestCheckHeadlineDiversity:
252      def test_多様な見出し(self) -> None:
253          score, msgs = _check_headline_diversity(["商品紹介", "お客様の声", "無料体験"])
254          assert score > 0.5
255          assert msgs == []
256  
257      def test_単一見出し(self) -> None:
258          score, msgs = _check_headline_diversity(["テスト"])
259          assert score == 1.0
260  
261      def test_同義語含む見出し(self) -> None:
262          score, msgs = _check_headline_diversity(["安い商品", "格安セール"])
263          assert score < 1.0
264          assert len(msgs) > 0
265  
266  
267  # ---------------------------------------------------------------------------
268  # _check_keyword_relevance
269  # ---------------------------------------------------------------------------
270  
271  
272  @pytest.mark.unit
273  class TestCheckKeywordRelevance:
274      def test_全キーワード含有(self) -> None:
275          score, missing = _check_keyword_relevance(
276              ["テスト商品の紹介"],
277              ["テストの説明文"],
278              ["テスト", "商品"],
279          )
280          assert score == 1.0
281          assert missing == []
282  
283      def test_一部未含有(self) -> None:
284          score, missing = _check_keyword_relevance(
285              ["テスト"],
286              ["説明文"],
287              ["テスト", "未知のKW"],
288          )
289          assert score == 0.5
290          assert "未知のKW" in missing
291  
292      def test_キーワードなし(self) -> None:
293          score, missing = _check_keyword_relevance(
294              ["テスト"],
295              ["説明文"],
296              [],
297          )
298          assert score == 0.5
299  
300  
301  # ---------------------------------------------------------------------------
302  # _strip_match_type
303  # ---------------------------------------------------------------------------
304  
305  
306  @pytest.mark.unit
307  class TestStripMatchType:
308      def test_フレーズマッチ(self) -> None:
309          assert _strip_match_type('"テスト"') == "テスト"
310  
311      def test_完全一致(self) -> None:
312          assert _strip_match_type("[テスト]") == "テスト"
313  
314      def test_絞り込み部分一致(self) -> None:
315          assert _strip_match_type("+テスト") == "テスト"
316  
317      def test_通常キーワード(self) -> None:
318          assert _strip_match_type("テスト") == "テスト"
319  
320  
321  # ---------------------------------------------------------------------------
322  # predict_ad_strength
323  # ---------------------------------------------------------------------------
324  
325  
326  @pytest.mark.unit
327  class TestPredictAdStrength:
328      def test_最小構成(self) -> None:
329          result = predict_ad_strength(
330              headlines=["見出し1", "見出し2", "見出し3"],
331              descriptions=["説明文1", "説明文2"],
332          )
333  
334          assert isinstance(result, AdStrengthResult)
335          assert result.level in ("POOR", "AVERAGE", "GOOD", "EXCELLENT")
336          assert 0.0 <= result.score <= 1.0
337          assert len(result.factors) == 6
338  
339      def test_最大構成でEXCELLENT(self) -> None:
340          result = predict_ad_strength(
341              headlines=[f"見出し{i}" for i in range(15)],
342              descriptions=[f"説明文{i}" for i in range(4)],
343              keywords=["見出し0", "見出し1"],
344              has_sitelinks=True,
345              pinned_count=0,
346          )
347  
348          assert result.level in ("GOOD", "EXCELLENT")
349          assert result.score >= 0.65
350  
351      def test_ピン留めでペナルティ(self) -> None:
352          base = predict_ad_strength(
353              headlines=["見出し1", "見出し2", "見出し3"],
354              descriptions=["説明文1", "説明文2"],
355              pinned_count=0,
356          )
357          pinned = predict_ad_strength(
358              headlines=["見出し1", "見出し2", "見出し3"],
359              descriptions=["説明文1", "説明文2"],
360              pinned_count=3,
361          )
362  
363          assert pinned.score < base.score
364  
365      def test_サジェスチョン生成(self) -> None:
366          result = predict_ad_strength(
367              headlines=["見出し1"],
368              descriptions=["説明文1"],
369          )
370  
371          assert len(result.suggestions) > 0