/ src / solace_agent_mesh / agent / tools / web_search_tools.py
web_search_tools.py
  1  """
  2  Web Search Tools for Solace Agent Mesh
  3  Provides web search capabilities using Google Custom Search API.
  4  
  5  For other search providers (e.g., Exa, Brave, Tavily), please use the corresponding
  6  plugins from the solace-agent-mesh-plugins repository.
  7  """
  8  
  9  import logging
 10  from typing import Any, Dict, Optional
 11  from datetime import datetime, timezone
 12  from google.adk.tools import ToolContext
 13  
 14  from ...tools.web_search import GoogleSearchTool, SearchResult
 15  from .tool_definition import BuiltinTool
 16  from .registry import tool_registry
 17  from ...common.rag_dto import create_rag_source, create_rag_search_result
 18  
 19  log = logging.getLogger(__name__)
 20  
 21  CATEGORY_NAME = "web_search"
 22  CATEGORY_DESCRIPTION = "Tools for searching the web and retrieving current information"
 23  
 24  # State key for tracking search turns within a task/session
 25  _SEARCH_TURN_STATE_KEY = "web_search_turn_counter"
 26  
 27  
 28  def _get_next_search_turn(tool_context: Optional[ToolContext]) -> int:
 29      """
 30      Get the next search turn number using tool context state.
 31      
 32      This approach stores the turn counter in the tool context state, which is:
 33      - Per-task/session scoped (not global)
 34      - Automatically cleaned up when the task ends
 35      
 36      Each search within a task gets a unique turn number, so citations from
 37      different searches never collide (e.g., s0r0, s0r1 for first search,
 38      s1r0, s1r1 for second search).
 39      """
 40      if not tool_context:
 41          # Fallback: return 0 if no context (shouldn't happen in practice)
 42          log.warning("[web_search] No tool_context provided, using turn=0")
 43          return 0
 44      
 45      # Get current turn from state, defaulting to 0
 46      current_turn = tool_context.state.get(_SEARCH_TURN_STATE_KEY, 0)
 47      
 48      # Increment for next search
 49      tool_context.state[_SEARCH_TURN_STATE_KEY] = current_turn + 1
 50      
 51      return current_turn
 52  
 53  
 54  async def web_search_google(
 55      query: str,
 56      max_results: int = 5,
 57      search_type: Optional[str] = None,
 58      date_restrict: Optional[str] = None,
 59      safe_search: Optional[str] = None,
 60      tool_context: ToolContext = None,
 61      tool_config: Optional[Dict[str, Any]] = None,
 62      **kwargs
 63  ) -> str:
 64      """
 65      Search the web using Google Custom Search API.
 66      
 67      Args:
 68          query: The search query string
 69          max_results: Maximum number of results to return (1-10)
 70          search_type: Set to 'image' for image search
 71          date_restrict: Restrict results by recency (e.g., 'd7' for last 7 days)
 72          safe_search: Safe search level - 'off', 'medium', or 'high'
 73          tool_context: ADK tool context
 74          tool_config: Tool configuration containing API keys
 75          
 76      Returns:
 77          JSON string containing search results with sources for citation
 78      """
 79      log_identifier = "[web_search_google]"
 80      
 81      try:
 82          config = tool_config or {}
 83          api_key = config.get("google_search_api_key")
 84          search_engine_id = config.get("google_cse_id")
 85          
 86          if not api_key or not search_engine_id:
 87              error_msg = "google_search_api_key or google_cse_id not configured in tool_config"
 88              log.error("%s %s", log_identifier, error_msg)
 89              return f"Error: {error_msg}"
 90          
 91          tool = GoogleSearchTool(
 92              api_key=api_key,
 93              search_engine_id=search_engine_id
 94          )
 95          
 96          result: SearchResult = await tool.search(
 97              query=query,
 98              max_results=max_results,
 99              search_type=search_type,
100              date_restrict=date_restrict,
101              safe_search=safe_search,
102              **kwargs
103          )
104          
105          if not result.success:
106              log.error("%s Search failed: %s", log_identifier, result.error)
107              return f"Error: {result.error}"
108          
109          # Get unique search turn for this search to prevent citation ID collisions
110          # Uses tool context state (per-task scoped, automatically cleaned up)
111          search_turn = _get_next_search_turn(tool_context)
112          citation_prefix = f"s{search_turn}r"  # e.g., s0r0, s0r1 for first search; s1r0, s1r1 for second
113          
114          log.info(
115              "%s Search successful: %d results, %d images (turn=%d, citation_prefix=%s)",
116              log_identifier,
117              len(result.organic),
118              len(result.images),
119              search_turn,
120              citation_prefix
121          )
122          
123          rag_sources = []
124          valid_citation_ids = []
125          
126          # Log citation-to-source mapping for debugging
127          log.debug("%s === CITATION TO SOURCE MAPPING (turn %d) ===", log_identifier, search_turn)
128          
129          for i, source in enumerate(result.organic):
130              citation_id = f"{citation_prefix}{i}"
131              valid_citation_ids.append(citation_id)
132              
133              # Log each citation mapping at debug level
134              log.debug(
135                  "%s Citation [[cite:%s]] -> URL: %s | Title: %s",
136                  log_identifier,
137                  citation_id,
138                  source.link,
139                  source.title[:50] if source.title else "N/A"
140              )
141              
142              rag_source = create_rag_source(
143                  citation_id=citation_id,
144                  file_id=f"web_search_{search_turn}_{i}",
145                  filename=source.attribution or source.title,
146                  title=source.title,
147                  source_url=source.link,
148                  url=source.link,
149                  content_preview=source.snippet,
150                  relevance_score=1.0,
151                  source_type="web",
152                  retrieved_at=datetime.now(timezone.utc).isoformat(),
153                  metadata={
154                      "title": source.title,
155                      "link": source.link,
156                      "type": "web_search",
157                      "favicon": f"https://www.google.com/s2/favicons?domain={source.link}&sz=32" if source.link else ""
158                  }
159              )
160              rag_sources.append(rag_source)
161          
162          log.debug("%s === END CITATION MAPPING ===", log_identifier)
163          log.debug("%s Valid citation IDs for this search: %s", log_identifier, valid_citation_ids)
164          
165          for i, image in enumerate(result.images):
166              image_citation_id = f"img{search_turn}r{i}"
167              image_source = create_rag_source(
168                  citation_id=image_citation_id,
169                  file_id=f"web_search_image_{search_turn}_{i}",
170                  filename=image.title or f"Image {i+1}",
171                  title=image.title,
172                  source_url=image.link,
173                  url=image.link,
174                  content_preview=image.title or "",
175                  relevance_score=1.0,
176                  source_type="image",
177                  retrieved_at=datetime.now(timezone.utc).isoformat(),
178                  metadata={
179                      "title": image.title,
180                      "link": image.link,
181                      "imageUrl": image.imageUrl,
182                      "type": "image",
183                  }
184              )
185              rag_sources.append(image_source)
186          
187          rag_metadata = create_rag_search_result(
188              query=query,
189              search_type="web_search",
190              timestamp=datetime.now(timezone.utc).isoformat(),
191              sources=rag_sources
192          )
193          
194          # Build a formatted result string that clearly associates each citation ID with its content
195          # This helps the LLM correctly match citations to facts
196          formatted_results = []
197          formatted_results.append(f"=== SEARCH RESULTS (Turn {search_turn}) ===")
198          formatted_results.append(f"Query: {query}")
199          formatted_results.append(f"Valid citation IDs: {', '.join(valid_citation_ids)}")
200          formatted_results.append("")
201          
202          for i, source in enumerate(result.organic):
203              citation_id = f"{citation_prefix}{i}"
204              formatted_results.append(f"--- RESULT {i+1} ---")
205              formatted_results.append(f"CITATION ID: [[cite:{citation_id}]]")
206              formatted_results.append(f"TITLE: {source.title}")
207              formatted_results.append(f"URL: {source.link}")
208              formatted_results.append(f"CONTENT: {source.snippet}")
209              formatted_results.append(f"USE [[cite:{citation_id}]] to cite facts from THIS result only")
210              formatted_results.append("")
211          
212          formatted_results.append("=== END SEARCH RESULTS ===")
213          formatted_results.append("")
214          formatted_results.append("IMPORTANT: Each citation ID is UNIQUE to its result.")
215          formatted_results.append("Only use a citation ID for facts that appear in THAT specific result's CONTENT.")
216          
217          return {
218              "result": result.model_dump_json(),
219              "formatted_results": "\n".join(formatted_results),
220              "rag_metadata": rag_metadata,
221              "valid_citation_ids": valid_citation_ids,
222              "num_results": len(result.organic),
223              "search_turn": search_turn
224          }
225          
226      except Exception as e:
227          log.exception("%s Unexpected error in Google search: %s", log_identifier, e)
228          return f"Error executing Google search: {str(e)}"
229  
230  
231  web_search_google_tool_def = BuiltinTool(
232      name="web_search_google",
233      implementation=web_search_google,
234      description=(
235          "Search the web using Google Custom Search API. "
236          "Use this when you need up-to-date information from Google. "
237          "Always cite text sources using the citation format provided in your instructions. "
238          "IMPORTANT: Image results will be displayed automatically in the UI - do NOT cite images, do NOT mention image URLs, and do NOT use citation markers like [[cite:imageX]] for images in your response text."
239      ),
240      category=CATEGORY_NAME,
241      category_description=CATEGORY_DESCRIPTION,
242      required_scopes=["tool:web_search:execute"],
243      parameters={
244          "type": "object",
245          "properties": {
246              "query": {
247                  "type": "string",
248                  "description": "The search query"
249              },
250              "max_results": {
251                  "type": "integer",
252                  "description": "Maximum number of results (1-10)",
253                  "minimum": 1,
254                  "maximum": 10,
255                  "default": 5
256              },
257              "search_type": {
258                  "type": "string",
259                  "enum": ["image"],
260                  "description": "Set to 'image' for image search"
261              },
262              "date_restrict": {
263                  "type": "string",
264                  "description": "Restrict results by recency (e.g., 'd7' for last 7 days)"
265              },
266              "safe_search": {
267                  "type": "string",
268                  "enum": ["off", "medium", "high"],
269                  "description": "Safe search level"
270              }
271          },
272          "required": ["query"]
273      },
274  )
275  
276  tool_registry.register(web_search_google_tool_def)
277  
278  log.info("Web search tools registered: web_search_google")
279  log.info("Note: For Exa, Brave, and Tavily search, use plugins from solace-agent-mesh-plugins")