/ mureo / google_ads / _intent_classifier.py
_intent_classifier.py
  1  """Search term intent classification.
  2  
  3  Provides data models, prompts, and parsers for search term intent classification.
  4  LLM dependency is removed in mureo-core; no LLM calls are made here.
  5  Batch classification via LLM should be done on the Managed side.
  6  """
  7  
  8  from __future__ import annotations
  9  
 10  import json
 11  import logging
 12  from dataclasses import dataclass
 13  from typing import Any
 14  
 15  logger = logging.getLogger(__name__)
 16  
 17  # Intent category constants
 18  INTENT_TRANSACTIONAL = "transactional"
 19  INTENT_COMMERCIAL = "commercial_investigation"
 20  INTENT_INFORMATIONAL = "informational"
 21  INTENT_NAVIGATIONAL = "navigational"
 22  
 23  VALID_INTENTS: frozenset[str] = frozenset(
 24      {
 25          INTENT_TRANSACTIONAL,
 26          INTENT_COMMERCIAL,
 27          INTENT_INFORMATIONAL,
 28          INTENT_NAVIGATIONAL,
 29      }
 30  )
 31  
 32  
 33  @dataclass(frozen=True)
 34  class SearchTermIntent:
 35      """Search term intent classification result."""
 36  
 37      search_term: str
 38      intent: (
 39          str  # transactional / commercial_investigation / informational / navigational
 40      )
 41      relevance_score: int  # 0-100: Relevance to advertiser business
 42      reasoning: str
 43      exclude_recommendation: bool  # Whether exclusion is recommended
 44  
 45  
 46  _CLASSIFY_PROMPT = """\
 47  あなたはリスティング広告の検索語句分析の専門家です。
 48  
 49  ## タスク
 50  以下の検索語句それぞれについて、検索意図とビジネス関連度を分析してください。
 51  ビジネスコンテキスト(特にペルソナとUSP)を参照し、
 52  ターゲットユーザーの検索意図に合致するかどうかを判断してください。
 53  
 54  ## ビジネスコンテキスト
 55  - キャンペーン名: {campaign_name}
 56  - 登録キーワード: {keywords}
 57  {strategic_context}
 58  
 59  ## 検索語句リスト
 60  {search_terms}
 61  
 62  ## 分類ルール
 63  
 64  ### 検索意図(intent)
 65  - **transactional**: 購入・申込・問合せなど行動意図がある(例: 「〇〇 申し込み」「〇〇 購入」)
 66  - **commercial_investigation**: 購入前の比較検討段階(例: 「〇〇 おすすめ」「〇〇 口コミ」)
 67  - **informational**: 情報収集が目的(例: 「〇〇 とは」「〇〇 仕組み」「〇〇 やり方」)
 68  - **navigational**: 特定サイト・ブランドへの直接アクセス意図(例: 「〇〇 公式」「〇〇 ログイン」)
 69  
 70  ### ビジネス関連度(relevance_score: 0-100)
 71  - 90-100: 直接的に商品・サービスに関連し、CVにつながる可能性が高い
 72  - 60-89: 関連はあるがCV意図は弱い
 73  - 30-59: 間接的に関連、CV可能性は低い
 74  - 0-29: 無関係または競合ブランドへの遷移意図
 75  
 76  ### 除外推奨(exclude_recommendation)
 77  以下の条件で true:
 78  - informational かつ relevance_score < 40
 79  - navigational かつ 自社ブランドでない
 80  - relevance_score < 20(意図に関わらず無関係)
 81  
 82  **重要:** ペルソナのニーズやUSPに関連する語句は、短期的なCPA悪化だけで除外推奨にしないこと。
 83  ターゲットの検索意図に合致する語句は relevance_score を高く評価すること。
 84  
 85  ## 出力フォーマット(JSON配列)
 86  ```json
 87  [
 88      {{
 89          "search_term": "検索語句",
 90          "intent": "transactional",
 91          "relevance_score": 85,
 92          "reasoning": "商品名を含み購入意図が明確",
 93          "exclude_recommendation": false
 94      }}
 95  ]
 96  ```
 97  
 98  JSON配列のみを出力してください。"""
 99  
100  
101  class IntentClassifier:
102      """Search term intent classifier.
103  
104      LLM dependency is removed in mureo-core; no LLM calls are made here.
105      Handles prompt generation and response parsing only.
106      Classification via LLM should be done on the Managed side.
107      """
108  
109      @staticmethod
110      def build_prompt(
111          search_terms: list[str],
112          campaign_name: str = "",
113          keywords: list[str] | None = None,
114          strategic_context: str | None = None,
115      ) -> str:
116          """Generate a prompt for LLM classification.
117  
118          The actual LLM call is performed on the Managed side.
119          """
120          terms_text = "\n".join(
121              f"{idx + 1}. {term}" for idx, term in enumerate(search_terms)
122          )
123  
124          context_section = ""
125          if strategic_context:
126              context_section = f"\n### 戦略情報(ペルソナ・USP)\n{strategic_context}"
127  
128          return _CLASSIFY_PROMPT.format(
129              campaign_name=campaign_name or "(未指定)",
130              keywords=", ".join(keywords[:20]) if keywords else "(未指定)",
131              search_terms=terms_text,
132              strategic_context=context_section,
133          )
134  
135      @staticmethod
136      def parse_response(
137          content: str, original_terms: list[str]
138      ) -> list[SearchTermIntent]:
139          """Parse an LLM response."""
140          text = content.strip()
141          if "```json" in text:
142              text = text.split("```json", 1)[1].split("```", 1)[0].strip()
143          elif "```" in text:
144              text = text.split("```", 1)[1].split("```", 1)[0].strip()
145  
146          try:
147              data = json.loads(text)
148          except (json.JSONDecodeError, ValueError) as exc:
149              logger.warning("Failed to parse intent classification JSON: %s", exc)
150              return [
151                  SearchTermIntent(
152                      search_term=t,
153                      intent=INTENT_INFORMATIONAL,
154                      relevance_score=50,
155                      reasoning=f"Default classification due to parse failure: {exc}",
156                      exclude_recommendation=False,
157                  )
158                  for t in original_terms
159              ]
160  
161          if not isinstance(data, list):
162              data = [data]
163  
164          # Match LLM output against original term list
165          result_map: dict[str, dict[str, Any]] = {}
166          for item in data:
167              if isinstance(item, dict):
168                  key = item.get("search_term", "").strip().lower()
169                  result_map[key] = item
170  
171          results: list[SearchTermIntent] = []
172          for term in original_terms:
173              item = result_map.get(term.strip().lower(), {})
174              intent_raw = str(item.get("intent", INTENT_INFORMATIONAL)).lower()
175              intent = intent_raw if intent_raw in VALID_INTENTS else INTENT_INFORMATIONAL
176  
177              relevance = item.get("relevance_score", 50)
178              if not isinstance(relevance, (int, float)):
179                  relevance = 50
180              relevance = max(0, min(100, int(relevance)))
181  
182              results.append(
183                  SearchTermIntent(
184                      search_term=term,
185                      intent=intent,
186                      relevance_score=relevance,
187                      reasoning=str(item.get("reasoning", "No classification info")),
188                      exclude_recommendation=bool(
189                          item.get("exclude_recommendation", False)
190                      ),
191                  )
192              )
193  
194          return results