_intent_classifier.py
1 """Search term intent classification. 2 3 Provides data models, prompts, and parsers for search term intent classification. 4 LLM dependency is removed in mureo-core; no LLM calls are made here. 5 Batch classification via LLM should be done on the Managed side. 6 """ 7 8 from __future__ import annotations 9 10 import json 11 import logging 12 from dataclasses import dataclass 13 from typing import Any 14 15 logger = logging.getLogger(__name__) 16 17 # Intent category constants 18 INTENT_TRANSACTIONAL = "transactional" 19 INTENT_COMMERCIAL = "commercial_investigation" 20 INTENT_INFORMATIONAL = "informational" 21 INTENT_NAVIGATIONAL = "navigational" 22 23 VALID_INTENTS: frozenset[str] = frozenset( 24 { 25 INTENT_TRANSACTIONAL, 26 INTENT_COMMERCIAL, 27 INTENT_INFORMATIONAL, 28 INTENT_NAVIGATIONAL, 29 } 30 ) 31 32 33 @dataclass(frozen=True) 34 class SearchTermIntent: 35 """Search term intent classification result.""" 36 37 search_term: str 38 intent: ( 39 str # transactional / commercial_investigation / informational / navigational 40 ) 41 relevance_score: int # 0-100: Relevance to advertiser business 42 reasoning: str 43 exclude_recommendation: bool # Whether exclusion is recommended 44 45 46 _CLASSIFY_PROMPT = """\ 47 あなたはリスティング広告の検索語句分析の専門家です。 48 49 ## タスク 50 以下の検索語句それぞれについて、検索意図とビジネス関連度を分析してください。 51 ビジネスコンテキスト(特にペルソナとUSP)を参照し、 52 ターゲットユーザーの検索意図に合致するかどうかを判断してください。 53 54 ## ビジネスコンテキスト 55 - キャンペーン名: {campaign_name} 56 - 登録キーワード: {keywords} 57 {strategic_context} 58 59 ## 検索語句リスト 60 {search_terms} 61 62 ## 分類ルール 63 64 ### 検索意図(intent) 65 - **transactional**: 購入・申込・問合せなど行動意図がある(例: 「〇〇 申し込み」「〇〇 購入」) 66 - **commercial_investigation**: 購入前の比較検討段階(例: 「〇〇 おすすめ」「〇〇 口コミ」) 67 - **informational**: 情報収集が目的(例: 「〇〇 とは」「〇〇 仕組み」「〇〇 やり方」) 68 - **navigational**: 特定サイト・ブランドへの直接アクセス意図(例: 「〇〇 公式」「〇〇 ログイン」) 69 70 ### ビジネス関連度(relevance_score: 0-100) 71 - 90-100: 直接的に商品・サービスに関連し、CVにつながる可能性が高い 72 - 60-89: 関連はあるがCV意図は弱い 73 - 30-59: 間接的に関連、CV可能性は低い 74 - 0-29: 無関係または競合ブランドへの遷移意図 75 76 ### 除外推奨(exclude_recommendation) 77 以下の条件で true: 78 - informational かつ relevance_score < 40 79 - navigational かつ 自社ブランドでない 80 - relevance_score < 20(意図に関わらず無関係) 81 82 **重要:** ペルソナのニーズやUSPに関連する語句は、短期的なCPA悪化だけで除外推奨にしないこと。 83 ターゲットの検索意図に合致する語句は relevance_score を高く評価すること。 84 85 ## 出力フォーマット(JSON配列) 86 ```json 87 [ 88 {{ 89 "search_term": "検索語句", 90 "intent": "transactional", 91 "relevance_score": 85, 92 "reasoning": "商品名を含み購入意図が明確", 93 "exclude_recommendation": false 94 }} 95 ] 96 ``` 97 98 JSON配列のみを出力してください。""" 99 100 101 class IntentClassifier: 102 """Search term intent classifier. 103 104 LLM dependency is removed in mureo-core; no LLM calls are made here. 105 Handles prompt generation and response parsing only. 106 Classification via LLM should be done on the Managed side. 107 """ 108 109 @staticmethod 110 def build_prompt( 111 search_terms: list[str], 112 campaign_name: str = "", 113 keywords: list[str] | None = None, 114 strategic_context: str | None = None, 115 ) -> str: 116 """Generate a prompt for LLM classification. 117 118 The actual LLM call is performed on the Managed side. 119 """ 120 terms_text = "\n".join( 121 f"{idx + 1}. {term}" for idx, term in enumerate(search_terms) 122 ) 123 124 context_section = "" 125 if strategic_context: 126 context_section = f"\n### 戦略情報(ペルソナ・USP)\n{strategic_context}" 127 128 return _CLASSIFY_PROMPT.format( 129 campaign_name=campaign_name or "(未指定)", 130 keywords=", ".join(keywords[:20]) if keywords else "(未指定)", 131 search_terms=terms_text, 132 strategic_context=context_section, 133 ) 134 135 @staticmethod 136 def parse_response( 137 content: str, original_terms: list[str] 138 ) -> list[SearchTermIntent]: 139 """Parse an LLM response.""" 140 text = content.strip() 141 if "```json" in text: 142 text = text.split("```json", 1)[1].split("```", 1)[0].strip() 143 elif "```" in text: 144 text = text.split("```", 1)[1].split("```", 1)[0].strip() 145 146 try: 147 data = json.loads(text) 148 except (json.JSONDecodeError, ValueError) as exc: 149 logger.warning("Failed to parse intent classification JSON: %s", exc) 150 return [ 151 SearchTermIntent( 152 search_term=t, 153 intent=INTENT_INFORMATIONAL, 154 relevance_score=50, 155 reasoning=f"Default classification due to parse failure: {exc}", 156 exclude_recommendation=False, 157 ) 158 for t in original_terms 159 ] 160 161 if not isinstance(data, list): 162 data = [data] 163 164 # Match LLM output against original term list 165 result_map: dict[str, dict[str, Any]] = {} 166 for item in data: 167 if isinstance(item, dict): 168 key = item.get("search_term", "").strip().lower() 169 result_map[key] = item 170 171 results: list[SearchTermIntent] = [] 172 for term in original_terms: 173 item = result_map.get(term.strip().lower(), {}) 174 intent_raw = str(item.get("intent", INTENT_INFORMATIONAL)).lower() 175 intent = intent_raw if intent_raw in VALID_INTENTS else INTENT_INFORMATIONAL 176 177 relevance = item.get("relevance_score", 50) 178 if not isinstance(relevance, (int, float)): 179 relevance = 50 180 relevance = max(0, min(100, int(relevance))) 181 182 results.append( 183 SearchTermIntent( 184 search_term=term, 185 intent=intent, 186 relevance_score=relevance, 187 reasoning=str(item.get("reasoning", "No classification info")), 188 exclude_recommendation=bool( 189 item.get("exclude_recommendation", False) 190 ), 191 ) 192 ) 193 194 return results